def main(cfg): figs, device, bfig_kwargs = setup(cfg) encoder, decoder = load_models(cfg, device) frame, photo_cat = load_sdss_data(cfg) # FIGURE 1: Autoencoder single galaxy reconstruction if 1 in figs: make_autoencoder_figure(cfg, bfig_kwargs, device) # FIGURE 2: Classification and Detection metrics if 2 in figs: print( "INFO: Creating classification and detection metrics from SDSS frame figures..." ) dc_fig = DetectionClassificationFigures(**bfig_kwargs) dc_fig.save_figures(frame, photo_cat, encoder, decoder) mpl.rc_file_defaults() # FIGURE 3: Reconstructions on SDSS if 3 in figs: print("INFO: Creating reconstructions from SDSS figures...") sdss_rec_fig = SDSSReconstructionFigures(cfg.plots.scenes, **bfig_kwargs) sdss_rec_fig.save_figures(frame, encoder, decoder) mpl.rc_file_defaults() if 4 in figs: make_blend_sim_figure(cfg, encoder, decoder, bfig_kwargs) if not figs.intersection({1, 2, 3, 4}): raise NotImplementedError( "No figures were created, `cfg.plots.figs` should be a subset of [1,2,3,4]." )
def plot_den(sim, snap, num, subdir="", voff = 0, box=10, elem="Si", ion=2): """Plot density""" hspec = get_hspec(sim, snap, snr=20., box=box) #Adjust the default plot parameters, which do not scale well in a gridspec. matplotlib.rc('xtick', labelsize=10) matplotlib.rc('ytick', labelsize=10) matplotlib.rc('axes', labelsize=10) matplotlib.rc('font', size=8) matplotlib.rc('lines', linewidth=1.5) gs = gridspec.GridSpec(9,2) ax3 = plt.subplot(gs[0:4,0]) plt.sca(ax3) xoff = hspec.plot_spectrum(elem,ion,-1,num, flux=False) xlim = plt.xlim() ax3.xaxis.set_label_position('top') ax3.xaxis.tick_top() voff += xoff ax2 = plt.subplot(gs[5:,0]) plt.sca(ax2) dxlim = hspec.plot_density(elem,ion, num) plt.ylabel(r"n$_\mathrm{"+elem+"II}$ (cm$^{-3}$)") plt.ylim(ymin=1e-9) ax1 = plt.subplot(gs[4,0]) plt.sca(ax1) xscale = dxlim*hspec.velfac/xlim[1] hspec.plot_den_to_tau(elem, ion, num, thresh = 1e-9, xlim=200,voff=voff, xscale=xscale) ax1.axes.get_xaxis().set_visible(False) plt.xlabel("") plt.xlim(xlim) sdir = path.join(outdir,"spectra/"+subdir) if not path.exists(sdir): os.mkdir(sdir) save_figure(path.join(sdir,str(num)+"_cosmo"+str(sim)+"_"+elem+"_colden")) plt.clf() matplotlib.rc_file_defaults()
def run(self): if matplotlib is None: msg = req_missing(['matplotlib'], 'use the plot directive', optional=True) return [nodes.raw('', '<div class="text-error">{0}</div>'.format(msg), format='html')] if not self.arguments and not self.content: raise self.error('The plot directive needs either an argument or content.') if self.arguments and self.content: raise self.error('The plot directive needs either an argument or content, not both.') if self.arguments: plot_path = self.arguments[0] with io.open(plot_path, encoding='utf-8') as fd: data = fd.read() elif self.content: data = '\n'.join(self.content) plot_path = md5(data).hexdigest() # Always reset context plt.close('all') matplotlib.rc_file_defaults() # Run plot exec(data) out_path = os.path.join(self.out_dir, plot_path + '.svg') plot_url = '/' + os.path.join('pyplots', plot_path + '.svg').replace(os.sep, '/') figures = [manager.canvas.figure for manager in matplotlib._pylab_helpers.Gcf.get_all_fig_managers()] for figure in figures: makedirs(os.path.dirname(out_path)) figure.savefig(out_path, format='svg') # Yes, if there's more than one, it's overwritten, sucks. self.arguments = [plot_url] return super(PyPlot, self).run()
def __init__(self, folder, design, gff, output_filename="rnadiff.html", **kwargs): """.. rubric:: constructor """ super().__init__() self.title = "RNAdiff" self.independent_module = True self.module_command = "--module rnadiff" from sequana.rnadiff import RNADiffResults self.rnadiff = RNADiffResults(folder, design, gff=gff, **kwargs) # nice layout for the report import seaborn seaborn.set() self.create_main_report_content() self.create_individual_reports() self.create_html(output_filename) import matplotlib matplotlib.rc_file_defaults()
def back2future(): """ Activate matplotlib settings from the default matplotlibrc file. """ print("Activating settings from", mpl.matplotlib_fname()) mpl.rc_file_defaults() mpl.rcParams["axes.titlesize"] = "medium"
def plot_correlation_matrix(features, image_save_directory, total_values): # Select column values to use in the correlation plot feature_plot = list(range(0, 10, 1)) # Select outcomes to show feature_plot.extend([-4, -3, -2, -1]) print(feature_plot) print(total_values.columns[feature_plot]) # http://benalexkeen.com/correlation-in-python/ # https://stackoverflow.com/questions/26975089/making-the-labels-of-the-scatterplot-vertical-and-horizontal-in-pandas #Check if the matrix is singular if np.linalg.cond( total_values.iloc[:, feature_plot]) < 1 / sys.float_info.epsilon: m.rc_file_defaults() # Reset sns axs = pd.plotting.scatter_matrix(total_values.iloc[:, feature_plot], figsize=(15, 15), alpha=0.2, diagonal='kde') n = len(features.iloc[:, feature_plot].columns) for i in range(n): for j in range(n): # to get the axis of subplots ax = axs[i, j] # to make x axis name vertical ax.xaxis.label.set_rotation(90) # to make y axis name horizontal ax.yaxis.label.set_rotation(0) # to make sure y axis names are outside the plot area ax.yaxis.labelpad = 50 # plt.yticks(rotation=90) vis.save_figure(plt.gcf(), image_save_directory=image_save_directory, filename="Scatter-Matrix") else: warnings.warn("Inputmatrix is singular and cannot be calculated. ")
def make_autoencoder_figure(cfg, bfig_kwargs, device): print("INFO: Creating autoencoder figures...") autoencoder = instantiate(cfg.models.galaxy_net) autoencoder.load_state_dict( torch.load(cfg.models.prior.galaxy_prior.autoencoder_ckpt)) autoencoder = autoencoder.to(device).eval() # generate galsim simulated galaxies images if file does not exist. galaxies_file = Path(cfg.plots.simulated_sdss_individual_galaxies) if not galaxies_file.exists() or cfg.plots.overwrite: print( f"INFO: Generating individual galaxy images and saving to: {galaxies_file}" ) dataset = instantiate(cfg.datasets.sdss_galaxies, batch_size=512, n_batches=20, num_workers=20) imagepath = galaxies_file.parent / (galaxies_file.stem + "_images.png") generate.generate(dataset, galaxies_file, imagepath, n_plots=25, global_params=("background", "slen")) # create figure classes and plot. ae_figures = AEReconstructionFigures(n_examples=5, **bfig_kwargs) ae_figures.save_figures(autoencoder, galaxies_file, cfg.plots.psf_file, cfg.plots.sdss_pixel_scale) mpl.rc_file_defaults()
def reset_rcparams_to_default(): """ Reset the rcParams to the default settings. """ mpl.rcParams.clear() mpl.rc_file_defaults() set_rcparams(DEFAULT_RCPARAMS) # We must keep our backend mpl.use(MPL_BACKEND)
def plot_pca(X_scaled, class_labels, image_save_directory, y): m.rc_file_defaults() # Reset sns pca_trafo = PCA().fit(X_scaled) pca_values = pca_trafo.transform(X_scaled) # from adjustText import adjust_text targets = np.array(y).flatten() fig, ax1 = plt.subplots(figsize=(10, 8)) plt.semilogy(pca_trafo.explained_variance_ratio_, '--o') ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis plt.semilogy(pca_trafo.explained_variance_ratio_.cumsum(), '--o', color='green') plt.xlabel("Principal Component") plt.ylabel("Explained variance") plt.xticks(np.arange(0, len(pca_trafo.explained_variance_ratio_))) plt.hlines(0.95, 0, len(pca_trafo.explained_variance_ratio_.cumsum()), colors='red', linestyles='solid', label='95% variance covered') vis.save_figure(plt.gcf(), image_save_directory=image_save_directory, filename='PCA_Variance_Coverage') fig = plt.figure() sns.heatmap(np.log(pca_trafo.inverse_transform(np.eye(X_scaled.shape[1]))), cmap="hot", cbar=True) necessary_components = pca_trafo.explained_variance_ratio_.cumsum()[ pca_trafo.explained_variance_ratio_.cumsum() < 0.95] print( "95% variance covered with the {} first components. Values={}".format( len(necessary_components), necessary_components)) vis.save_figure(plt.gcf(), image_save_directory=image_save_directory, filename='PCA_Heatmap') plt.figure(figsize=(10, 10)) # plt.scatter(pca_values[:,0], pca_values[:,1], c=targets, edgecolor='none', label=class_labels.values(), alpha=0.5) for i, t in enumerate(set(targets)): idx = targets == t plt.scatter(pca_values[idx, 0], pca_values[idx, 1], label=class_labels[t], edgecolor='none', alpha=0.5) plt.legend(labels=class_labels.values(), bbox_to_anchor=(1, 1)) plt.xlabel('Component 1') plt.ylabel('Component 2') vis.save_figure(plt.gcf(), image_save_directory=image_save_directory, filename='PCA_Plot')
def test_visual(): for k in range(0, 3): fig = plt.figure() ax = fig.add_subplot(111) # The big subplot ax.set_aspect('equal', adjustable='box') plt.sca(ax) pairs_all = pd.DataFrame({'x': pd.Series(np.zeros(500), dtype=int), 'y': pd.Series(np.zeros(500), dtype=int)}) sns.scatterplot(data=pairs_all, x="x", y="y", ax=ax, s=0.15) # plt.legend([], [], frameon=False) ax.legend(loc='upper left', markerscale=0.2, bbox_to_anchor=(1.04, 1), fontsize=2) ax.set_xlim(0, 100) ax.set_ylim(0, 100) # ax.set_xticklabels(ax.get_xticklabels(), fontsize = 4) # ax.set_yticklabels(ax.get_yticklabels(), fontsize = 4) plt.xticks(np.arange(0, 100 + 1, 5), fontsize=2 ) plt.yticks(np.arange(0, 100 + 1, 5), fontsize=2 ) ax.set_xlabel("") ax.set_ylabel("") fig.tight_layout() output_png = '{}_{}.png'.format('file1', k) plt.savefig(output_png, dpi=2400, bbox_inches='tight') fig.clear() plt.close(fig) fig = plt.figure() ax_w = fig.add_subplot(111) # The big subplot # ax_w.set_aspect('equal', adjustable='box') plt.sca(ax_w) sns.set(font_scale=0.5) sns.heatmap(np.zeros((100, 100)), cbar_kws={'label': 'Energy'}, ax=ax_w, cmap='plasma') ax_w.set_xlim(0, 100) plt.xticks(fontsize=3) plt.yticks(fontsize=3) # ax_w.set_title(description, fontweight='bold', fontsize=4) fig.tight_layout() output_png = '{}_{}.png'.format('file2', k) plt.savefig(output_png, dpi=2400, bbox_inches='tight') fig.clear() plt.close(fig) matplotlib.rc_file_defaults()
def plot_correlation_bar(X_scaled, conf, image_save_directory, y_scaled): m.rc_file_defaults() # Reset sns corr = X_scaled.corrwith(y_scaled[conf['Common'].get('class_name')], axis=0) corr.sort_values().plot.barh(color='blue', title='Strength of Correlation', figsize=(10, 25)) print(corr) plt.gcf() vis.save_figure(plt.gcf(), image_save_directory=image_save_directory, filename='Correlation_Strength')
def run(self): if matplotlib is None: msg = req_missing(['matplotlib'], 'use the plot directive', optional=True) return [ nodes.raw('', '<div class="text-error">{0}</div>'.format(msg), format='html') ] if not self.arguments and not self.content: raise self.error( 'The plot directive needs either an argument or content.') if self.arguments and self.content: raise self.error( 'The plot directive needs either an argument or content, not both.' ) if self.arguments: plot_path = self.arguments[0] with io.open(plot_path, encoding='utf-8') as fd: data = fd.read() elif self.content: data = '\n'.join(self.content) plot_path = md5(data).hexdigest() # Always reset context plt.close('all') matplotlib.rc_file_defaults() # Run plot exec(data) out_path = os.path.join(self.out_dir, plot_path + '.svg') plot_url = '/' + os.path.join('pyplots', plot_path + '.svg').replace( os.sep, '/') figures = [ manager.canvas.figure for manager in matplotlib._pylab_helpers.Gcf.get_all_fig_managers() ] for figure in figures: makedirs(os.path.dirname(out_path)) figure.savefig( out_path, format='svg' ) # Yes, if there's more than one, it's overwritten, sucks. self.arguments = [plot_url] return super(PyPlot, self).run()
def cycle_run_simulation(mat_values, mat_pos, number_of_run, from_zero_to_interval, from_zero_to_noise): global dir_res_automatic_sim, mat_rows dir_name = dir_res_automatic_sim + str(calendar.timegm( time.gmtime())) + '/' check_dir_res(dir_name) for index_interval in [1, 4, 8, 12]: save_dir = dir_name + 'Interval #' + str(index_interval) + '/' res_single_interval = pd.DataFrame(columns=['Noise', 'AVG', 'Total']) index_noise = 1 mat_pos2 = deepcopy(mat_pos) mat_for_position = mat_pos2[:, 0:index_interval] list_mat_for_sim = [] for i in range(0, mat_rows): for j in range(0, index_interval): list_mat_for_sim.append(mat_for_position[i, j]) for index_noise in range(1, from_zero_to_noise + 1): if index_noise >= mat_rows * index_interval: break else: mat_values2 = deepcopy(mat_values) mat_for_simulation = mat_values2[:, 0:index_interval] avg_real_values, list_avg_mat_modified, result = run_simulation( mat_for_simulation, list_mat_for_sim, number_of_run, index_interval, index_noise) res_single_interval = print_single_result( mat_rows * index_interval, index_noise, avg_real_values, list_avg_mat_modified, result, save_dir + 'N(' + str(index_noise) + ')/', True, res_single_interval) res_single_interval['Real_AVG'] = avg_real_values sns.boxplot(data=res_single_interval, x='Total', y='AVG') plt.savefig(save_dir + f'Interval {index_interval} - Total.pdf') plt.close() del res_single_interval['Total'] res_single_interval.to_csv(save_dir + f'Interval {index_interval}.csv') sns.lineplot(x='Noise', y='value', hue='variable', data=pd.melt(res_single_interval, ['Noise'])) plt.savefig(save_dir + f'Interval {index_interval} (Line).pdf') plt.close() plt.rcParams['figure.figsize'][0] = round(index_noise / 2) sns.boxplot(data=res_single_interval, x="Noise", y="AVG") plt.savefig(save_dir + f'Interval {index_interval} (BoxPlot).pdf') plt.close() mpl.rc_file_defaults()
def plot_den(sim, snap, num, subdir="", voff=0, box=10, elem="Si", ion=2): """Plot density""" hspec = get_hspec(sim, snap, snr=20., box=box) #Adjust the default plot parameters, which do not scale well in a gridspec. matplotlib.rc('xtick', labelsize=10) matplotlib.rc('ytick', labelsize=10) matplotlib.rc('axes', labelsize=10) matplotlib.rc('font', size=8) matplotlib.rc('lines', linewidth=1.5) gs = gridspec.GridSpec(9, 2) ax3 = plt.subplot(gs[0:4, 0]) plt.sca(ax3) xoff = hspec.plot_spectrum(elem, ion, -1, num, flux=False) xlim = plt.xlim() ax3.xaxis.set_label_position('top') ax3.xaxis.tick_top() voff += xoff ax2 = plt.subplot(gs[5:, 0]) plt.sca(ax2) dxlim = hspec.plot_density(elem, ion, num) plt.ylabel(r"n$_\mathrm{" + elem + "II}$ (cm$^{-3}$)") plt.ylim(ymin=1e-9) ax1 = plt.subplot(gs[4, 0]) plt.sca(ax1) xscale = dxlim * hspec.velfac / xlim[1] hspec.plot_den_to_tau(elem, ion, num, thresh=1e-9, xlim=200, voff=voff, xscale=xscale) ax1.axes.get_xaxis().set_visible(False) plt.xlabel("") plt.xlim(xlim) sdir = path.join(outdir, "spectra/" + subdir) if not path.exists(sdir): os.mkdir(sdir) save_figure( path.join(sdir, str(num) + "_cosmo" + str(sim) + "_" + elem + "_colden")) plt.clf() matplotlib.rc_file_defaults()
def plot_t_sne(X_scaled_subset, y_scaled_subset, class_labels, image_save_directory): ### Visualize Data with t-SNE # Select a random subset to visualize import random # Reduce the training set with the number of samples randomly chosen # X_train_index_subset = sup.get_data_subset_index(1000, X_scaled) np.random.seed(0) # X_embedded = TSNE(n_components=2, perplexity=5.0, early_exaggeration=12.0, n_iter=5000, # n_iter_without_progress=1000, learning_rate=10).fit_transform(embedded) X_embedded = TSNE(n_components=2, perplexity=10.0, early_exaggeration=100.0, n_iter=5000, n_iter_without_progress=1000, learning_rate=10).fit_transform(X_scaled_subset) #### Plot t-SNE with best parameters m.rc_file_defaults() # Reset sns # Plot with texts added to the graphs # from adjustText import adjust_text #targets = np.array(y[X_train_index_subset]).flatten() plt.figure(figsize=(10, 10)) texts = [] if y_scaled_subset is not None and class_labels is not None: print("Plot t-sne with known classes") for i, t in enumerate(set(y_scaled_subset)): idx = y_scaled_subset == t # for x, y in zip(X_embedded[idx, 0], X_embedded[idx, 1]): # texts.append(plt.text(x, y, t)) plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=class_labels[t]) # adjust_text(texts, force_points=0.2, force_text=0.2, expand_points=(1,1), expand_text=(1,1), arrowprops=dict(arrowstyle="-", color='black', lw=0.5)) plt.legend(bbox_to_anchor=(1, 1)) else: print("Plot t-sne without known classes") plt.scatter(X_embedded[:, 0], X_embedded[:, 1]) vis.save_figure(plt.gcf(), image_save_directory=image_save_directory, filename='T-SNE_Plot')
def print_pretty_spectra(snapnum, simname): """Print pretty versions of spectra from a simulation snapshot""" rands = np.random.randint(0,1000,20) halo = ps.CIVPlot(snapnum, simname, savefile="nr_dla_spectra.hdf5", spec_res = 50.) offsets = halo.get_offsets()[rands] np.savetxt("tau_Rperp_table.txt", np.sort(np.vstack((np.arange(0,1000), halo.get_offsets())).T,0)) for nn in rands: gs = gridspec.GridSpec(9,2) axes = (plt.subplot(gs[0:4,0]), plt.subplot(gs[5:,0]), plt.subplot(gs[4,0])) #Adjust the default plot parameters, which do not scale well in a gridspec. matplotlib.rc('xtick', labelsize=8) matplotlib.rc('ytick', labelsize=8) matplotlib.rc('axes', labelsize=8) matplotlib.rc('font', size=6) matplotlib.rc('lines', linewidth=1.5) plot_den(halo, axes, nn+1000, color="red") plot_den(halo, axes, nn) np.savetxt(str(nn)+"_tau_DLA.txt",halo.get_tau("C",4,1548,nn)) np.savetxt(str(nn)+"_tau_CGM.txt",halo.get_tau("C",4,1548,nn+1000)) axes[0].text(-500, 0.2,"offset (prop kpc): "+str(offsets[nn]*0.33333/0.7)) odir = path.join(outdir, "spectra") save_figure(path.join(odir,str(nn)+"_cosmo"+"_CIV_spec")) plt.clf() matplotlib.rc_file_defaults()
def run_colocation( sp_data, n_neighbours=None, model_name='CoLocatedGroupsSklearnNMF', verbose=True, return_all=True, train_args={ 'n_fact': [30], 'n_iter': 20000, 'sample_name_col': None, 'mode': 'normal', 'n_type': 'restart', 'n_restarts': 5 }, model_kwargs={ 'init': 'random', 'random_state': 0, 'nmf_kwd_args': { 'tol': 0.00001 } }, posterior_args={}, export_args={ 'path': "./results", 'run_name_suffix': '', 'top_n': 10 }): r""" Run co-located cell type combination model: train for specified number of factors, evaluate the stability, save, export results and save diagnostic plots Parameters ---------- sp_data: Anndata object with cell2location model output in .uns['mod'] Alternatively np.ndarray with cell density parameters Alternatively pd.DataFrame with cell density parameters (with location and cell type names) Returns ------- dict dictionary {'mod','sc_data','model_name', 'train_args','posterior_args', 'export_args', 'run_name', 'run_time'} """ # set default parameters d_train_args = { 'n_fact': [30], 'n_iter': 20000, 'learning_rate': 0.01, 'use_cuda': False, 'sample_prior': False, 'posterior_summary': 'post_sample_q05', 'sample_name_col': None, 'mode': 'normal', 'n_type': 'restart', 'n_restarts': 5, 'include_source_location': False } d_posterior_args = { 'n_samples': 1000, 'evaluate_stability_align': False, 'evaluate_stability_transpose': True, 'mean_field_slot': "init_1" } d_export_args = { 'path': "./results", 'plot_extension': "pdf", 'scanpy_coords_name': 'spatial', 'scanpy_plot_vmax': 'p99.2', 'scanpy_plot_size': 1.3, 'save_model': True, 'run_name_suffix': '', 'export_q05': False, 'plot_histology': False, 'top_n': 10 } # replace defaults with parameters supplied for k in train_args.keys(): d_train_args[k] = train_args[k] train_args = d_train_args for k in posterior_args.keys(): d_posterior_args[k] = posterior_args[k] posterior_args = d_posterior_args for k in export_args.keys(): d_export_args[k] = export_args[k] export_args = d_export_args # start timing start = time.time() sp_data = sp_data.copy() # import the specified version of the model if type(model_name) is str: import cell2location.models as models Model = getattr(models, model_name) else: Model = model_name ####### Preparing data ####### # extract cell density parameter X_data = sp_data.uns['mod'][ train_args['posterior_summary']]['spot_factors'] var_names = sp_data.uns['mod']['fact_names'] obs_names = sp_data.obs_names if train_args['sample_name_col'] is None: # if slots needed to generate scanpy plots are present, use scanpy spatial slot name: sc_spatial_present = np.isin(list(sp_data.uns.keys()), ['spatial'])[0] if sc_spatial_present: sp_data.obs['sample'] = list(sp_data.uns['spatial'].keys())[0] else: sp_data.obs['sample'] = 'sample' train_args['sample_name_col'] = 'sample' sample_id = sp_data.obs[train_args['sample_name_col']] if n_neighbours is not None: neighbours = spatial_neighbours( coords=sp_data.obsm['spatial'], n_sp_neighbors=n_neighbours, radius=None, include_source_location=train_args['include_source_location'], sample_id=sample_id) neighbours_sum = sum_neighbours(X_data, neighbours) X_data = np.concatenate([X_data, neighbours_sum], axis=1) var_names = list(var_names) + ['neigh_' + i for i in var_names] res_dict = {} for n_fact in train_args['n_fact']: ####### Creating model ####### if verbose: print('### Creating model ### - time ' + str(np.around((time.time() - start) / 60, 2)) + ' min') # create model class n_fact = int(n_fact) mod = Model(n_fact, X_data, n_iter=train_args['n_iter'], verbose=verbose, var_names=var_names, obs_names=obs_names, fact_names=['fact_' + str(i) for i in range(n_fact)], sample_id=sample_id, **model_kwargs) ####### Print run name ####### run_name = str(mod.__class__.__name__) + '_' + str(mod.n_fact) + 'combinations_' \ + str(mod.n_obs) + 'locations_' + str(mod.n_var) + 'factors' \ + export_args['run_name_suffix'] path_name = str(mod.__class__.__name__) + '_' \ + str(mod.n_obs) + 'locations_' + str(mod.n_var) + 'factors' \ + export_args['run_name_suffix'] print('### Analysis name: ' + run_name) # analysis name is always printed # create the export directory path = export_args['path'] + path_name + '/' if not os.path.exists(path): os.makedirs(os.path.abspath(path)) ####### Sampling prior ####### if train_args['sample_prior']: raise ValueError('Sampling prior not implemented yet') ####### Training model ####### if verbose: print('### Training model###') if train_args['mode'] == 'normal': mod.fit(n=train_args['n_restarts'], n_type=train_args['n_type']) elif train_args['mode'] == 'tracking': raise ValueError('tracking training not implemented yet') else: raise ValueError( "train_args['mode'] can be only 'normal' or 'tracking'") ####### Evaluate stability of training ####### fig_path = path + 'stability_plots/' if not os.path.exists(fig_path): mkdir(fig_path) if train_args['n_restarts'] > 1: n_plots = train_args['n_restarts'] - 1 ncol = int(np.min((n_plots, 3))) nrow = np.ceil(n_plots / ncol) plt.figure(figsize=(5 * nrow, 5 * ncol)) mod.evaluate_stability( 'cell_type_factors', n_samples=posterior_args['n_samples'], align=posterior_args['evaluate_stability_align']) plt.tight_layout() save_plot(fig_path, filename=f'cell_type_factors_n_fact{mod.n_fact}', extension=export_args['plot_extension']) if verbose: plt.show() plt.close() plt.figure(figsize=(5 * nrow, 5 * ncol)) mod.evaluate_stability( 'location_factors', n_samples=posterior_args['n_samples'], align=posterior_args['evaluate_stability_align']) plt.tight_layout() save_plot(fig_path, filename=f'location_factors_n_fact{mod.n_fact}', extension=export_args['plot_extension']) if verbose: plt.show() plt.close() ####### Evaluating parameters / sampling posterior ####### if verbose: print( f'### Evaluating parameters / sampling posterior ### - time {np.around((time.time() - start) / 60, 2)} min' ) # extract all parameters from parameter store or sample posterior mod.sample_posterior(node='all', n_samples=posterior_args['n_samples'], save_samples=False, mean_field_slot=posterior_args['mean_field_slot']) # evaluate predictive accuracy of the model mod.compute_expected() # Plot predictive accuracy fig_path = path + 'predictive_accuracy/' if not os.path.exists(fig_path): mkdir(fig_path) try: plt.figure(figsize=(5.5, 5.5)) mod.plot_posterior_mu_vs_data() plt.tight_layout() save_plot(fig_path, filename=f'data_vs_posterior_mean_n_fact{mod.n_fact}', extension=export_args['plot_extension']) if verbose: plt.show() plt.close() except Exception as e: print( 'Some error in plotting `mod.plot_posterior_mu_vs_data()`\n ' + str(e)) ####### Export summarised posterior & Saving results ####### if verbose: print('### Saving results ###') # extract parameters into DataFrames mod.sample2df(node_name='nUMI_factors', ct_node_name='cell_type_factors') # export results to scanpy object sp_data = mod.annotate_adata(sp_data) # as columns to .obs sp_data = mod.export2adata( sp_data, slot_name=f'mod_coloc_n_fact{mod.n_fact}') # as a slot in .uns # print the fraction of cells of each type located to each combination ct_loadings = mod.print_gene_loadings( loadings_attr='cell_type_fractions', gene_fact_name='cell_type_fractions', top_n=export_args['top_n']) # save save_path = path + 'factor_markers/' if not os.path.exists(save_path): mkdir(save_path) ct_loadings.to_csv(f'{save_path}n_fact{mod.n_fact}.csv') save_path = path + 'location_factors_mean/' if not os.path.exists(save_path): mkdir(save_path) mod.location_factors_df.to_csv(f'{save_path}n_fact{mod.n_fact}.csv') save_path = path + 'cell_type_fractions_mean/' if not os.path.exists(save_path): mkdir(save_path) mod.cell_type_fractions.to_csv(f'{save_path}n_fact{mod.n_fact}.csv') if export_args['export_q05']: save_path = path + 'q05_param/' if not os.path.exists(save_path): mkdir(save_path) mod.location_factors_q05.to_csv(f'{path}location_factors_q05.csv') mod.cell_type_fractions_q05.to_csv( f'{path}cell_type_fractions_q05.csv') # A convenient way to explore the composition of cell type combinations / microenvironments is by using a heatmap: # make nice names mod.cell_type_fractions.columns = [ sub('mean_cell_type_factors', '', i) for i in mod.cell_type_fractions.columns ] fig_path = path + 'cell_type_fractions_heatmap/' if not os.path.exists(fig_path): mkdir(fig_path) # plot co-occuring cell type combinations mod.plot_gene_loadings( mod.var_names_read, mod.var_names_read, fact_filt=mod.fact_filt, loadings_attr='cell_type_fractions', gene_fact_name='cell_type_fractions', cmap='RdPu', figsize=[5 + 0.12 * mod.n_fact, 5 + 0.1 * mod.n_var]) save_plot(fig_path, filename=f'n_fact{mod.n_fact}', extension=export_args['plot_extension']) if verbose: plt.show() plt.close() ####### Plotting posterior of W / cell locations ####### # Finally we need to examine where in the tissue each cell type combination / microenvironment is located rcParams["figure.figsize"] = [5, 6] rcParams["axes.facecolor"] = "black" if verbose: print('### Plotting cell combinations in 2D ###') data_samples = sp_data.obs[train_args['sample_name_col']].unique() cluster_plot_names = mod.location_factors_df.columns fig_path = path + 'spatial/' try: for s in data_samples: # if slots needed to generate scanpy plots are present, use scanpy: sc_spatial_present = np.any( np.isin(list(sp_data.uns.keys()), ['spatial'])) if sc_spatial_present: sc.settings.figdir = fig_path s_ind = sp_data.obs[train_args['sample_name_col']] == s s_keys = list(sp_data.uns['spatial'].keys()) s_spatial = np.array(s_keys)[[s in i for i in s_keys]][0] # plot cell density in each combination sc.pl.spatial( sp_data[s_ind, :], cmap='magma', library_id=s_spatial, color=cluster_plot_names, ncols=6, size=export_args['scanpy_plot_size'], img_key='hires', alpha_img=0, vmin=0, vmax=export_args['scanpy_plot_vmax'], save= f"cell_density_mean_n_fact{mod.n_fact}_s{s}_{export_args['scanpy_plot_vmax']}.{export_args['plot_extension']}", show=False) if export_args['plot_histology']: sc.pl.spatial( sp_data[s_ind, :], cmap='magma', library_id=s_spatial, color=cluster_plot_names, ncols=6, size=export_args['scanpy_plot_size'], img_key='hires', alpha_img=1, vmin=0, vmax=export_args['scanpy_plot_vmax'], save= f"cell_density_mean_n_fact{mod.n_fact}_s{s}_{export_args['scanpy_plot_vmax']}.{export_args['plot_extension']}", show=False) else: # if coordinates exist plot if export_args['scanpy_coords_name'] is not None: # move spatial coordinates to obs for compatibility with our plotter sp_data.obs['imagecol'] = sp_data.obsm[ export_args['scanpy_coords_name']][:, 0] sp_data.obs['imagerow'] = sp_data.obsm[ export_args['scanpy_coords_name']][:, 1] p = c2lpl.plot_factor_spatial( adata=sp_data, fact_ind=np.arange( mod.location_factors_df.shape[1]), fact=mod.location_factors_df, cluster_names=cluster_plot_names, n_columns=6, trans='log10', max_col=100, col_breaks=[0, 1, 10, 20, 50], sample_name=s, samples_col=train_args['sample_name_col'], obs_x='imagecol', obs_y='imagerow') p.save( filename=fig_path + f"cell_density_mean_n_fact{mod.n_fact}_s{s}.{export_args['plot_extension']}" ) except Exception as e: print( 'Some error in plotting with scanpy or `cell2location.plt.plot_factor_spatial()`\n ' + str(e)) rcParams["axes.facecolor"] = "white" matplotlib.rc_file_defaults() # save model object and related annotations save_path = path + 'models/' if not os.path.exists(save_path): mkdir(save_path) if export_args['save_model']: # save the model and other objects res_dict_1 = { 'mod': mod, 'model_name': model_name, 'train_args': train_args, 'posterior_args': posterior_args, 'export_args': export_args, 'run_name': run_name, 'run_time': str(np.around( (time.time() - start) / 60, 2)) + ' min' } pickle.dump(res_dict_1, file=open(save_path + f'model_n_fact{mod.n_fact}.p', "wb")) else: # just save the settings res_dict_1 = { 'model_name': model_name, 'train_args': train_args, 'posterior_args': posterior_args, 'export_args': export_args, 'run_name': run_name, 'run_time': str(np.around( (time.time() - start) / 60, 2)) + ' min' } pickle.dump(res_dict_1, file=open(save_path + f'model_n_fact{mod.n_fact}.p', "wb")) res_dict[f'n_fact{mod.n_fact}'] = res_dict_1 if verbose: print('### Done ### - time ' + res_dict['run_time']) save_path = path + 'anndata/' if not os.path.exists(save_path): mkdir(save_path) # save anndata with exported posterior sp_data.write(filename=f'{save_path}sp.h5ad', compression='gzip') if return_all: return res_dict, sp_data else: del res_dict del res_dict_1 del mod gc.collect() return str((time.time() - start) / 60) + ' min'
import numpy as np import tensorflow as tf import matplotlib as mpl from tqdm import tqdm from absl import app from absl import flags from matplotlib import pyplot as plt from matplotlib import gridspec as grid import includes.visualization as visualization from includes.utils import load_data, Dataset mpl.rc_file_defaults() tf.logging.set_verbosity(tf.logging.ERROR) FLAGS = flags.FLAGS flags.DEFINE_string("model", "gumbolt-vae", "Model to use [vae, dvae, gumbolt-vae, gvae]") flags.DEFINE_string("datagroup", "mnist", "Datagroup to use [mnist, spiral, graph]") flags.DEFINE_string( "dataset", "static", "Dataset to use {mnist:[static], spiral:[normal], graph:[citeseer]}") flags.DEFINE_integer("latent_dim", 10, "Number of dimensions for latent variable Z")
def clear_state(plot_rcparams): plt.close('all') if hasattr(matplotlib, 'rc_file_defaults'): matplotlib.rc_file_defaults() if hasattr(matplotlib, 'rcParams'): matplotlib.rcParams.update(plot_rcparams)
def set_rc_params(params): # Reset params from rc file if matplotlib installation supports it. if hasattr(matplotlib, 'rc_file_defaults'): matplotlib.rc_file_defaults() if params: matplotlib.rcParams.update(params)
def analyse_features(features, y, class_labels, conf, image_save_directory): ''' ''' # Feature Visualization # Here, feature selection and visulization of datasets is performed Methods - Feature visualization through # t - SNE - Feature visualization and analysis through PCA ### Standardize Data for Feature Selection and Visualization # Z - Normalize the data around zero and divided by standard deviation.Fit the normalizer on the training data and # transform the training and the test data.The reason is that the scaler only must depend on the training data, # in order to prevent leakage of information from the test data. # === Remove Columns that are all the same features_reduced = features.loc[:, np.invert(unique_cols(features))] print("Reduce columns that are duplicated in terms of values.") features = features_reduced # === Select the best type of scaler ===# X_scaled = rescale_features(features) if y is not None: y_scaled = rescale_outcomes(conf, features, y) print("Merged features and outcomes to use in correlation matrix") total_values_scaled = X_scaled.join(y_scaled) plot_correlation_matrix2(conf, image_save_directory, total_values_scaled) plot_correlation_bar(X_scaled, conf, image_save_directory, y_scaled) else: total_values_scaled = X_scaled print("Only features will be used in for correlations.") ### Feature and Outcomes Correlation Matrix plot_correlation_matrix(features, image_save_directory, total_values_scaled) #fixme: Class names not correct shown plot_spearman_correlation_matrix(image_save_directory, total_values_scaled) #from tabulate import tabulate #print(tabulate(X_scaled, headers='keys', tablefmt='psql')) try: plot_hierarchical_linkage(X_scaled, conf, image_save_directory) except: warnings.warn("Cannot execute hiearchical linkage") traceback.print_exc() ### Feature visualization with Parallel Coordinates # Select a random subset to visualize import random # Reduce the training set with the number of samples randomly chosen X_train_index_subset = sup.get_random_data_subset_index(1000, features) X_train_scaled_subset = X_scaled.iloc[X_train_index_subset, :] if y is not None: df_y = y_scaled = pd.DataFrame( data=y.reshape(-1, 1), index=features.index, columns=[conf['Common'].get('class_name')]) total_values = features.join(df_y) print( "Merged features and outcomes to use in correlation matrix unscaled" ) y_train_subset = np.array(y[X_train_index_subset]).flatten() # Select column values to use in the correlation plot feature_plot = list(range(0, 10, 1)) # cols = ['MA2Norm', 'MA50Norm', 'MA200Norm', 'MA400Norm', 'MA200NormDiff', 'MA400NormDiff'] cols = total_values.columns[feature_plot] print(feature_plot) print(cols) comparison_name = conf['Common'].get('class_name') print("Class name: ", comparison_name) df_fv = total_values.iloc[X_train_index_subset, :] # Use parallel coordinates to visualize the classes and all features for plotting # https://plot.ly/python/parallel-coordinates-plot/ # http://benalexkeen.com/parallel-coordinates-in-matplotlib/ m.rc_file_defaults() # Reset sns colors = ['#2e8ad8', '#cd3785', '#c64c00', '#889a00'] plot_parallel_coordinates(df_fv, cols, colors, comparison_name, conf, image_save_directory) #plot_parallel_coordinates(X_train_index_subset, cols, comparison_name, total_values) else: y_train_subset = None warnings.warn( "No y value. Parallel coordinates will not be calculated.") #### t-SNE Parameter Grid Search #calibrate_tsne = False #if calibrate_tsne: # find_tsne_parmeters(X_train_scaled_subset, y_train_scaled_subset, class_labels) # t-SNE plot plot_t_sne(X_train_scaled_subset, y_train_subset, class_labels, image_save_directory) ### UMAP Cluster Analysis plot_umap(X_scaled, class_labels, image_save_directory, y) ### PCA Analysis try: plot_pca(X_scaled, class_labels, image_save_directory, y) except: warnings.warn("Cannot execute PCA") traceback.print_exc()
def model_performance(y_true, y_preds, artifact_path, artifact_ext, threshold, show_plots=False): matplotlib.rc_file_defaults() y_preds_bin = np.where(y_preds > threshold, 1, 0) #ROC Curve plt.clf() plt.close() plt.figure() false_positive_rate, recall, thresholds = roc_curve(y_true, y_preds) roc_auc = auc(false_positive_rate, recall) plt.title('Receiver Operating Characteristic (ROC)') plt.plot(false_positive_rate, recall, 'b', label='AUC = %0.3f' % roc_auc) plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], 'r--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.ylabel('Recall') plt.xlabel('Fall-out (1-Specificity)') plt.savefig(os.path.join(artifact_path, f'roc_curve_{artifact_ext}.png'), bbox_inches="tight") if show_plots: plt.show() print('auc score:', roc_auc) #Classification Report print(classification_report(y_true, y_preds_bin)) #Confusion Matrix plt.clf() plt.close() plt.figure() cm = confusion_matrix(y_true, y_preds_bin) labels = ['0', '1'] plt.figure(figsize=(8, 6)) sns.heatmap(cm, xticklabels=labels, yticklabels=labels, annot=True, fmt='d', cmap="Blues", vmin=0.2) plt.title('Confusion Matrix') plt.ylabel('True Class') plt.xlabel('Predicted Class') plt.savefig(os.path.join(artifact_path, f'confusion_matrix_{artifact_ext}.png'), bbox_inches="tight") if show_plots: plt.show() #Probability Histogram plt.clf() prob_hist = pd.Series(y_preds).hist(cumulative=True, bins=20) fig = prob_hist.get_figure().savefig(os.path.join( artifact_path, f'hist_plot_{artifact_ext}.png'), bbox_inches="tight") return classification_report(y_true, y_preds_bin, output_dict=True) ##still need to test it... #def feature_importance(): # shap_values = shap.TreeExplainer(bst).shap_values(X_val) # shap.summary_plot(shap_values[1], X_val)
def analyze_timegraph(source, features, y, conf, image_save_directory): ''' ''' from statsmodels.graphics.tsaplots import plot_acf, plot_pacf from statsmodels.stats.diagnostic import acorr_ljungbox # Autocorrelation # Source: https://machinelearningmastery.com/gentle-introduction-autocorrelation-partial-autocorrelation/ #### Autoregression Intuition # Consider a time series that was generated by an autoregression(AR) process with a lag of k.We know that the # ACF describes the autocorrelation between an observation and another observation at a prior time step # that includes direct and indirect dependence information.This means we would expect the ACF for # the AR(k) time series to be strong to a lag of k and the inertia of that relationship would # carry on to subsequent lag values, trailing off at some point as the effect was weakened. # We know that the PACF only describes the direct relationship between an observation and its lag. # This would suggest that there would be no correlation for lag values beyond k. # This is exactly the expectation of the ACF and PACF plots for an AR(k) process. #### Moving Average Intuition # Consider a time series that was generated by a moving average(MA) process with a lag of k.Remember that the # moving average process is an autoregression model of the time series of residual errors from prior # predictions.Another way to think about the moving average model is that it corrects future forecasts # based on errors made on recent forecasts.We would expect the ACF for the MA(k) process to show a strong # correlation with recent values up to the lag of k, then a sharp decline to low or no correlation. # By definition, this is how the process was generated.For the PACF, we would expect the plot to show a # strong relationship to the lag and a trailing off of correlation from the lag onwards.Again, this is # exactly the expectation of the ACF and PACF plots for an MA(k) process. # if the autocorrelation function has a very long tail, then it is no stationary process m.rc_file_defaults() # Reset sns # Here, the time graph is selected print( "Plot the total autocorrelation of the price.The dark blue values are the correlation of the price with " "the lag. The light blue cone is the confidence interval. If the correlation is > cone, the value is " "significant.") vis.plot_autocorrelation(np.log(source['Close']), "OMXS30", mode="acf", lags=None, xlim=None, ylim=None, image_save_directory=image_save_directory) vis.plot_autocorrelation(np.log(source['Close']), "OMXS30_700_first", mode="acf", lags=None, xlim=[0, 700], ylim=None, image_save_directory=image_save_directory) vis.plot_autocorrelation(np.log(source['Close']), "OMXS30", mode="pacf", lags=200, xlim=None, ylim=None, image_save_directory=image_save_directory) vis.plot_autocorrelation(np.log(source['Close']), "OMXS30_first_10", mode="pacf", lags=50, xlim=[0, 10], ylim=None, image_save_directory=image_save_directory) vis.plot_autocorrelation(features.MA200Norm, "OMXS30_MA200", mode="acf", lags=None, xlim=None, ylim=None, image_save_directory=image_save_directory) vis.plot_autocorrelation(features.MA200Norm, "OMXS30_MA200_first_200", mode="acf", lags=None, xlim=[0, 200], ylim=None, image_save_directory=image_save_directory) vis.plot_autocorrelation(features.MA200Norm, "OMXS30_MA200", mode="pacf", lags=200, xlim=None, ylim=None, image_save_directory=image_save_directory) # Plot difference between time values to see if the differences are stationary diff = pd.DataFrame( data=np.divide(source['Close'] - source['Close'].shift(1), source['Close'])).set_index( source['Date']) diff = diff.iloc[1:, :] fig = plt.figure(figsize=(15, 4)) plt.plot(source['Date'].iloc[1:], diff) plt.grid() # Here, the time graph is selected print( "Plot the total autocorrelation of the price. The dark blue values are the correlation of the price with the lag. " + "The light blue cone is the confidence interval. If the correlation is > cone, the value is significant." ) vis.plot_autocorrelation(diff, "OMXS30_difference", mode="acf", lags=None, xlim=[0, 50], ylim=[-0.2, 0.2], image_save_directory=image_save_directory) vis.plot_autocorrelation(diff, "OMXS30_difference", mode="pacf", lags=100, xlim=[0, 50], ylim=[-0.2, 0.2], image_save_directory=image_save_directory) #Plot temporal correlation X_scaled, y_scaled = rescale(conf, features, y) # Correlation graphs for temporal correlation vis.plot_temporal_correlation_feature(X_scaled, conf['Common'].get('dataset_name'), image_save_directory, source, y_scaled)
def perform_feature_selection_algorithms(features, y, conf, image_save_directory): ''' Perform feature selection ''' # Scale # Use this scaler also for the test data at the end X_scaled = pd.DataFrame( data=StandardScaler().fit(features).transform(features), index=features.index, columns=features.columns) # Reduce the training set with the number of samples randomly chosen X_train_index_subset = sup.get_random_data_subset_index(1000, X_scaled) print("Scaled data with standard scaler.") print("Get a subset of 1000 samples.") relevantFeatureList = [] selected_feature_list = pd.DataFrame() # Predict with logistic regression ### Lasso Feature Selection m.rc_file_defaults() # Reset sns coefList = execute_lasso_feature_selection(X_scaled, y, conf, image_save_directory) selected_feature_list = selected_feature_list.append( pd.Series(name='Lasso', data=coefList)) relevantFeatureList.extend(coefList) print( "Prediction of training data with logistic regression: {0:.2f}".format( predict_features_simple(X_scaled[coefList], y))) ### Tree based feature selection treecoefList = execute_treebased_feature_selection(X_scaled, y, conf, image_save_directory) selected_feature_list = selected_feature_list.append( pd.Series(name='Tree', data=treecoefList)) relevantFeatureList.extend(treecoefList) print( "Prediction of training data with logistic regression: {0:.2f}".format( predict_features_simple(X_scaled[treecoefList], y))) ### Backward Elimination # Backward Elimination - Wrapper method selected_features_BE = execute_backwardelimination_feature_selection( X_scaled, y) relevantFeatureList.extend(selected_features_BE) selected_feature_list = selected_feature_list.append( pd.Series(name='Backward_Elimination', data=selected_features_BE)) print( "Prediction of training data with logistic regression: {0:.2f}".format( predict_features_simple(X_scaled[selected_features_BE], y))) ### Recursive Elimination with Logistic Regression # Recursive Elimination - Wrapper method, Feature ranking with recursive feature elimination relevant_features, rfe_coef = execute_recursive_elimination_feature_selection( X_scaled.iloc[X_train_index_subset], y[X_train_index_subset]) relevantFeatureList.extend(relevant_features) step_size = np.round(len(X_scaled.columns) / 4, 0).astype(int) for i in range(step_size, len(X_scaled.columns), step_size): selected_feature_list = selected_feature_list.append( pd.Series(name='RecursiveTop' + str(i), data=rfe_coef.iloc[0:i - 1]).reset_index()['RecursiveTop' + str(i)]) print('Created RecursiveTop{}'.format(str(i))) ### Add the top coloums from all methods top_feature_cols = create_feature_list_from_top_features( relevantFeatureList) selected_feature_list = selected_feature_list.append( pd.Series(name='Manual', data=top_feature_cols)) ### Add all columns selected_feature_list = selected_feature_list.append( pd.Series(name='All', data=X_scaled.columns)) return selected_feature_list
def clear_state(plot_rcparams, close=True): if close: plt.close("all") matplotlib.rc_file_defaults() matplotlib.rcParams.update(plot_rcparams)
from scipy.signal import argrelextrema from bisip import invResults as iR from bisip.utils import format_results, get_data from bisip.utils import split_filepath, get_model_type from bisip.utils import var_depth, flatten try: import lib_dd.decomposition.ccd_single as ccd_single import lib_dd.config.cfg_single as cfg_single print("\nCCDtools available") except: pass import matplotlib as mpl mpl.rc_file_defaults() #============================================================================== # Function to run MCMC simulation on selected model # Arguments: model <function>, mcmc parameters <dict>,traces path <string> def run_MCMC(function, mc_p, save_traces=False, save_where=None): print("\nMCMC parameters:\n", mc_p) if save_traces: # If path doesn't exist, create it if not path.exists(save_where): makedirs(save_where) MDL = pymc.MCMC(function, db='txt', dbname=save_where) else: MDL = pymc.MCMC(function, db='ram', dbname=save_where)
numSamples = features_raw.shape[0] print("Number of samples={}".format(numSamples)) # Get number of features numFeatures = features_raw.shape[1] print("Number of features={}".format(numFeatures)) #Get the number of classes for the supervised learning numClasses = outcomes_raw[conf['class_name']].value_counts().shape[0] print("Number of classes={}".format(numClasses)) # ## Analyse and Transform time series # In[17]: m.rc_file_defaults() #Reset sns datatitle = conf['dataset_name'] plt.figure(num=None, figsize=(12.5, 7), dpi=80, facecolor='w', edgecolor='k') plt.plot(source['Date'], source['Close']) #To get scatter plot, add 'o' as the last parameter plt.title(datatitle) plt.xlabel("Timestamp") plt.ylabel("Price") plt.show() plt.figure(num=None, figsize=(12.5, 7), dpi=80, facecolor='w', edgecolor='k') plt.plot(source['Date'], np.log(source['Close'])) plt.title(datatitle + ' log transformed') plt.xlabel("Timestamp")
matplotlib.use(arg, warn=False, force=True) # установить backend matplotlib.get_backend() class matplotlib.RcParams(*args, **kwargs) # класс для хранения параметров copy() find_all(pattern) validate # словарь с функциями валидации matplotlib.rcParams # текущие параметры matplotlib.rc_context(rc=None, fname=None) # with rc_context(...) строим график matplotlib.rc(group, **kwargs) # устанавливает rc параметры matplotlib.rc_file(fname) # устанавливает параметры из файла matplotlib.rcdefaults() # устанавливает параметры по умолчанию matplotlib.rc_file_defaults() # устанавливает параметры из rc файла по умолчанию matplotlib.rc_params(fail_on_error=False) # возвращает параметры из rc файла по умолчанию matplotlib.rc_params_from_file(fname, fail_on_error=False, use_default_template=True) # matplotlib.matplotlib_fname() # путь к файлу с конфигами matplotlib.interactive(b) # устанавливает интерактивность matplotlib.is_interactive() # проверяет интерактивность #################### module style import matplotlib.style matplotlib.style.context(style, after_reset=False) matplotlib.style.reload_library() matplotlib.style.use(style) matplotlib.style.library # словарь доступных стилей matplotlib.style.available # список доступных стилей
def main( n_jobs: int, batch_size: int, aht_max_number_of_nodes: int, alpha_coefficient: float, experiment_id: Union[str, int], overwrite_neighborhood: bool, filter_graphs_to_intersected_vertices: bool, ): filter_graphs_to_intersected_vertices = bool( filter_graphs_to_intersected_vertices) for dataset_path, max_reviews in tqdm( datasets, desc="Amazon datasets processing..."): for experiment_name in [ experiment_name_enum.GERANI, experiment_name_enum.OUR_ALL_RULES, experiment_name_enum.OUR_TOP_1_RULES, ]: with mlflow.start_run( experiment_id=experiment_id, run_name= f"{experiment_name}-{dataset_path.stem}-{max_reviews}", ): mlflow.log_param("experiment_name", experiment_name) aspect_analysis = AspectAnalysis( input_path=dataset_path.as_posix(), output_path=settings.DEFAULT_OUTPUT_PATH / dataset_path.stem, experiment_name=experiment_name, jobs=n_jobs, batch_size=batch_size, max_docs=max_reviews, aht_max_number_of_nodes=aht_max_number_of_nodes, alpha_coefficient=alpha_coefficient, ) if experiment_name == experiment_name_enum.OUR_ALL_RULES: aspect_analysis.our_pipeline() elif experiment_name == experiment_name_enum.GERANI: aspect_analysis.gerani_pipeline() elif experiment_name == experiment_name_enum.OUR_TOP_1_RULES: aspect_analysis.our_pipeline_top_n_rules_per_discourse_tree( ) else: raise Exception("Wrong experiment type") for conceptnet_graph_path in tqdm( CONCEPTNET_GRAPH_TOOL_GRAPHS, desc="Conceptnet graph analysis..."): with mlflow.start_run( experiment_id=experiment_id, run_name=conceptnet_graph_path.stem, nested=True, # run_id=f'{experiment_id}-{conceptnet_graph_path.stem}' ) as run_conceptnet: mlflow.log_param("dataset_path", dataset_path) mlflow.log_param("dataset_name", dataset_path.stem) mlflow.log_param("method", experiment_name) mlflow.log_param("max_docs", max_reviews) mlflow.log_param("batch_size", batch_size) mlflow.log_param("n_jobs", n_jobs) mlflow.log_param("conceptnet_graph_path", conceptnet_graph_path) mlflow.log_param("conceptnet_graph_name", conceptnet_graph_path.stem) mlflow.log_param("aht_max_number_of_nodes", aht_max_number_of_nodes) mlflow.log_param("alpha_coefficient", alpha_coefficient) png_file_path = ( aspect_analysis.paths.experiment_path / f"shortest_paths_correlation_{conceptnet_graph_path.stem}.png" ) if png_file_path.exists( ) and not overwrite_neighborhood: logger.info( f"{png_file_path.as_posix()} has already exist, skipping to the next setting." ) mlflow.log_artifact(png_file_path.as_posix()) else: df = prepare_hierarchies_neighborhood( experiments_path=aspect_analysis.paths, conceptnet_graph_path=conceptnet_graph_path, filter_graphs_to_intersected_vertices= filter_graphs_to_intersected_vertices, ) logger.info( f"Shortest Paths pairs - data frame: {len(df)}" ) df = df[~((df.shortest_distance_aspect_graph. isin(VALUES_TO_SKIP)) | (df.shortest_distance_conceptnet. isin(VALUES_TO_SKIP)))] df.drop_duplicates(subset=["aspect_1", "aspect_2"]) mlflow.log_metric("number_of_shortest_paths", len(df)) logger.info( f"Shortest Paths pairs - data frame, without no paths and duplicates: {len(df)}" ) mlflow.log_dict( pd.DataFrame( df.shortest_distance_aspect_graph. value_counts()).to_dict(orient="index"), "shortest_distance_aspect_graph_distribution.json", ) mlflow.log_dict( pd.DataFrame( df.shortest_distance_conceptnet. value_counts()).to_dict(orient="index"), "shortest_distance_conceptnet_distribution.json", ) df = df[df.shortest_distance_aspect_graph <= 6] matplotlib.rc_file_defaults() ax1 = sns.set_style(style=None, rc=None) fig, ax1 = plt.subplots() sns_plot = sns.lineplot( x=df.shortest_distance_aspect_graph, y=df.shortest_distance_conceptnet, ax=ax1, ) ax2 = ax1.twinx() df_aspect_graph_distance_distribution = pd.DataFrame( df.shortest_distance_aspect_graph.value_counts( )) df_aspect_graph_distance_distribution.reset_index( inplace=True) df_aspect_graph_distance_distribution.sort_values( by="index", inplace=True) sns.barplot( x=df_aspect_graph_distance_distribution[ "index"], y=df_aspect_graph_distance_distribution. shortest_distance_aspect_graph, alpha=0.5, ax=ax2, ) logger.info( f"Shortest Paths correlation figure will be saved in {png_file_path}" ) df.sort_values(by="shortest_distance_conceptnet", inplace=True) pearson_correlation = df.shortest_distance_aspect_graph.corr( df.shortest_distance_conceptnet) spearman_correlation = df.shortest_distance_aspect_graph.corr( df.shortest_distance_conceptnet, method="spearman") kendall_correlation = df.shortest_distance_aspect_graph.corr( df.shortest_distance_conceptnet, method="kendall") df_csv_path = ( aspect_analysis.paths.experiment_path / "df.csv") df.to_csv(df_csv_path.as_posix()) mlflow.log_artifact(df_csv_path.as_posix()) mlflow.log_metrics({ "pearson": pearson_correlation, "spearman": spearman_correlation, "kendall": kendall_correlation, }) sns_plot.figure.savefig(png_file_path.as_posix()) plt.close() mlflow.log_artifact(png_file_path.as_posix())
from __future__ import division import numpy as np import matplotlib matplotlib.use("Agg", warn=False) matplotlib.rc_file_defaults() from matplotlib.figure import Figure from mpl_toolkits.basemap import Basemap import Utilities.colours from Utilities.smooth import smooth class MapFigure(Figure, Basemap): def __init__(self): Figure.__init__(self) self.subfigures = [] def add(self, data, xgrid, ygrid, title, levels, cbarlab, map_kwargs): self.subfigures.append((data, xgrid, ygrid, title, levels, cbarlab, map_kwargs)) def labelAxes(self, axes, xlabel='Longitude', ylabel='Latitude'): axes.set_xlabel(xlabel, labelpad=20, fontsize='x-small') axes.set_ylabel(ylabel, labelpad=25, fontsize='x-small') def addGraticule(self, axes, mapobj, dl=10.): xmin = mapobj.llcrnrlon
def _reset_sns(): sns.reset_orig() mpl.rc_file_defaults() mpl.rcParams['figure.dpi'] = 300
def render_figures(code, code_path, output_dir, output_base, config, context=True, function_name=None, context_reset=False, close_figs=False, raises=None): """ Run plot code and save the hi/low res PNGs, PDF in `output_dir` Save the images under `output_dir` with file names derived from `output_base`. Parameters ---------- code : str String containing code to run. code_path : str Path of file containing code. Usually path to ``.rst`` file. output_dir : str Path to which to write output images from plots. output_base : str Prefix for filename(s) for output image(s). config : instance Sphinx configuration instance. context : {True, False}, optional If True, use persistent context (workspace) for executing code. Otherwise create new empty context for executing code. function_name : None or str, optional If not-empty str, name of function to execute after executing `code`. context_reset : {False, True}, optional If True, clear persistent context (workspace) for code. close_figs : {False, True}, optional If True, close all figures generated before our `code` runs. False can be useful when building up a plot with several `code` blocks. raises : None or Exception, optional Exception class that code should raise, or None, for no exception. """ # -- Parse format list default_dpi = {'png': 80, 'hires.png': 200, 'pdf': 200} formats = [] plot_formats = config.nbplot_formats if isinstance(plot_formats, six.string_types): # String Sphinx < 1.3, Split on , to mimic # Sphinx 1.3 and later. Sphinx 1.3 always # returns a list. plot_formats = plot_formats.split(',') for fmt in plot_formats: if isinstance(fmt, six.string_types): if ':' in fmt: suffix, dpi = fmt.split(':') formats.append((str(suffix), int(dpi))) else: formats.append((fmt, default_dpi.get(fmt, 80))) elif type(fmt) in (tuple, list) and len(fmt) == 2: formats.append((str(fmt[0]), int(fmt[1]))) else: raise PlotError('invalid image format "%r" in nbplot_formats' % fmt) # Build the output ns = plot_context if context else {} if context_reset: plt.close('all') matplotlib.rc_file_defaults() matplotlib.rcParams.update(config.nbplot_rcparams) plot_context.clear() close_figs = not context or close_figs # Get working directory for code execution if setup.config.nbplot_working_directory is not None: workdir = _check_wd(setup.config.nbplot_working_directory) elif code_path is not None: workdir = abspath(dirname(code_path)) else: workdir = None if close_figs: plt.close('all') run_code(code, code_path, ns, function_name, workdir=workdir, pre_code=setup.config.nbplot_pre_code, raises=raises) images = [] fig_managers = Gcf.get_all_fig_managers() for j, figman in enumerate(fig_managers): if len(fig_managers) == 1: img = ImageFile(output_base, output_dir) else: img = ImageFile("%s_%02d" % (output_base, j), output_dir) images.append(img) for format, dpi in formats: try: figman.canvas.figure.savefig(img.filename(format), dpi=dpi) except Exception: raise PlotError(traceback.format_exc()) img.formats.append(format) return images
def clear_state(): plt.close('all') matplotlib.rc_file_defaults()
def set_rc_params(matplotlib_options): # Reset options from rc file. matplotlib.rc_file_defaults() if matplotlib_options: matplotlib.rcParams.update(matplotlib_options)
def set_rc_params(matplotlib_options): # Reset options from rc file if matplotlib installation supports it. if hasattr(matplotlib, 'rc_file_defaults'): matplotlib.rc_file_defaults() if matplotlib_options: matplotlib.rcParams.update(matplotlib_options)
def clear_state(plot_rcparams, close=True): if close: plt.close('all') matplotlib.rc_file_defaults() matplotlib.rcParams.update(plot_rcparams)
fig = plt.figure() ax_w = fig.add_subplot(111) # The big subplot #ax_w.set_aspect('equal', adjustable='box') plt.sca(ax_w) sns.set(font_scale=0.5) sns.heatmap(heatmap_wavelets, cbar_kws={'label': 'Energy'}, ax=ax_w, cmap='plasma') ax_w.set_xlim(0, len(chunk_sequence)) plt.xticks(fontsize=3) plt.yticks(fontsize=3) #ax_w.set_title(description, fontweight='bold', fontsize=4) fig.tight_layout() output_png = '{}_{}_wavelet_type_{}.png'.format( output, k, code_types[m]) plt.savefig(output_png, dpi=2400, bbox_inches='tight') fig.clear() plt.close(fig) matplotlib.rc_file_defaults() print(datetime.datetime.now())
def clear_state(plot_rcparams): plt.close('all') matplotlib.rc_file_defaults() matplotlib.rcParams.update(plot_rcparams)
def clear_state(plot_rcparams): plt.close('all') if hasattr(matplotlib, 'rc_file_defaults'): matplotlib.rc_file_defaults() matplotlib.rcParams.update(plot_rcparams)