コード例 #1
0
ファイル: main.py プロジェクト: prob-ml/bliss
def main(cfg):
    figs, device, bfig_kwargs = setup(cfg)
    encoder, decoder = load_models(cfg, device)
    frame, photo_cat = load_sdss_data(cfg)

    # FIGURE 1: Autoencoder single galaxy reconstruction
    if 1 in figs:
        make_autoencoder_figure(cfg, bfig_kwargs, device)

    # FIGURE 2: Classification and Detection metrics
    if 2 in figs:
        print(
            "INFO: Creating classification and detection metrics from SDSS frame figures..."
        )
        dc_fig = DetectionClassificationFigures(**bfig_kwargs)
        dc_fig.save_figures(frame, photo_cat, encoder, decoder)
        mpl.rc_file_defaults()

    # FIGURE 3: Reconstructions on SDSS
    if 3 in figs:
        print("INFO: Creating reconstructions from SDSS figures...")
        sdss_rec_fig = SDSSReconstructionFigures(cfg.plots.scenes,
                                                 **bfig_kwargs)
        sdss_rec_fig.save_figures(frame, encoder, decoder)
        mpl.rc_file_defaults()

    if 4 in figs:
        make_blend_sim_figure(cfg, encoder, decoder, bfig_kwargs)

    if not figs.intersection({1, 2, 3, 4}):
        raise NotImplementedError(
            "No figures were created, `cfg.plots.figs` should be a subset of [1,2,3,4]."
        )
コード例 #2
0
ファイル: make_vel_width.py プロジェクト: sbird/vw_spectra
def plot_den(sim, snap, num, subdir="", voff = 0, box=10, elem="Si", ion=2):
    """Plot density"""
    hspec = get_hspec(sim, snap, snr=20., box=box)
    #Adjust the default plot parameters, which do not scale well in a gridspec.
    matplotlib.rc('xtick', labelsize=10)
    matplotlib.rc('ytick', labelsize=10)
    matplotlib.rc('axes', labelsize=10)
    matplotlib.rc('font', size=8)
    matplotlib.rc('lines', linewidth=1.5)
    gs = gridspec.GridSpec(9,2)
    ax3 = plt.subplot(gs[0:4,0])
    plt.sca(ax3)
    xoff = hspec.plot_spectrum(elem,ion,-1,num, flux=False)
    xlim = plt.xlim()
    ax3.xaxis.set_label_position('top')
    ax3.xaxis.tick_top()
    voff += xoff
    ax2 = plt.subplot(gs[5:,0])
    plt.sca(ax2)
    dxlim = hspec.plot_density(elem,ion, num)
    plt.ylabel(r"n$_\mathrm{"+elem+"II}$ (cm$^{-3}$)")
    plt.ylim(ymin=1e-9)
    ax1 = plt.subplot(gs[4,0])
    plt.sca(ax1)
    xscale = dxlim*hspec.velfac/xlim[1]
    hspec.plot_den_to_tau(elem, ion, num, thresh = 1e-9, xlim=200,voff=voff, xscale=xscale)
    ax1.axes.get_xaxis().set_visible(False)
    plt.xlabel("")
    plt.xlim(xlim)
    sdir = path.join(outdir,"spectra/"+subdir)
    if not path.exists(sdir):
        os.mkdir(sdir)
    save_figure(path.join(sdir,str(num)+"_cosmo"+str(sim)+"_"+elem+"_colden"))
    plt.clf()
    matplotlib.rc_file_defaults()
コード例 #3
0
ファイル: pyplots.py プロジェクト: ChillarAnand/plugins
    def run(self):
        if matplotlib is None:
            msg = req_missing(['matplotlib'], 'use the plot directive', optional=True)
            return [nodes.raw('', '<div class="text-error">{0}</div>'.format(msg), format='html')]

        if not self.arguments and not self.content:
            raise self.error('The plot directive needs either an argument or content.')

        if self.arguments and self.content:
            raise self.error('The plot directive needs either an argument or content, not both.')

        if self.arguments:
            plot_path = self.arguments[0]
            with io.open(plot_path, encoding='utf-8') as fd:
                data = fd.read()
        elif self.content:
            data = '\n'.join(self.content)
            plot_path = md5(data).hexdigest()

        # Always reset context
        plt.close('all')
        matplotlib.rc_file_defaults()
        # Run plot
        exec(data)

        out_path = os.path.join(self.out_dir, plot_path + '.svg')
        plot_url = '/' + os.path.join('pyplots', plot_path + '.svg').replace(os.sep, '/')

        figures = [manager.canvas.figure for manager in matplotlib._pylab_helpers.Gcf.get_all_fig_managers()]
        for figure in figures:
            makedirs(os.path.dirname(out_path))
            figure.savefig(out_path, format='svg')  # Yes, if there's more than one, it's overwritten, sucks.
        self.arguments = [plot_url]
        return super(PyPlot, self).run()
コード例 #4
0
ファイル: rnadiff.py プロジェクト: sequana/sequana
    def __init__(self,
                 folder,
                 design,
                 gff,
                 output_filename="rnadiff.html",
                 **kwargs):
        """.. rubric:: constructor

        """
        super().__init__()
        self.title = "RNAdiff"
        self.independent_module = True
        self.module_command = "--module rnadiff"

        from sequana.rnadiff import RNADiffResults

        self.rnadiff = RNADiffResults(folder, design, gff=gff, **kwargs)

        # nice layout for the report
        import seaborn
        seaborn.set()

        self.create_main_report_content()
        self.create_individual_reports()

        self.create_html(output_filename)
        import matplotlib
        matplotlib.rc_file_defaults()
コード例 #5
0
def back2future():
    """
    Activate matplotlib settings from the default matplotlibrc file.
    """
    print("Activating settings from", mpl.matplotlib_fname())
    mpl.rc_file_defaults()
    mpl.rcParams["axes.titlesize"] = "medium"
コード例 #6
0
def plot_correlation_matrix(features, image_save_directory, total_values):
    # Select column values to use in the correlation plot
    feature_plot = list(range(0, 10, 1))
    # Select outcomes to show
    feature_plot.extend([-4, -3, -2, -1])
    print(feature_plot)
    print(total_values.columns[feature_plot])
    # http://benalexkeen.com/correlation-in-python/
    # https://stackoverflow.com/questions/26975089/making-the-labels-of-the-scatterplot-vertical-and-horizontal-in-pandas

    #Check if the matrix is singular
    if np.linalg.cond(
            total_values.iloc[:, feature_plot]) < 1 / sys.float_info.epsilon:
        m.rc_file_defaults()  # Reset sns
        axs = pd.plotting.scatter_matrix(total_values.iloc[:, feature_plot],
                                         figsize=(15, 15),
                                         alpha=0.2,
                                         diagonal='kde')
        n = len(features.iloc[:, feature_plot].columns)
        for i in range(n):
            for j in range(n):
                # to get the axis of subplots
                ax = axs[i, j]
                # to make x axis name vertical
                ax.xaxis.label.set_rotation(90)
                # to make y axis name horizontal
                ax.yaxis.label.set_rotation(0)
                # to make sure y axis names are outside the plot area
                ax.yaxis.labelpad = 50
        # plt.yticks(rotation=90)
        vis.save_figure(plt.gcf(),
                        image_save_directory=image_save_directory,
                        filename="Scatter-Matrix")
    else:
        warnings.warn("Inputmatrix is singular and cannot be calculated. ")
コード例 #7
0
ファイル: main.py プロジェクト: prob-ml/bliss
def make_autoencoder_figure(cfg, bfig_kwargs, device):
    print("INFO: Creating autoencoder figures...")
    autoencoder = instantiate(cfg.models.galaxy_net)
    autoencoder.load_state_dict(
        torch.load(cfg.models.prior.galaxy_prior.autoencoder_ckpt))
    autoencoder = autoencoder.to(device).eval()

    # generate galsim simulated galaxies images if file does not exist.
    galaxies_file = Path(cfg.plots.simulated_sdss_individual_galaxies)
    if not galaxies_file.exists() or cfg.plots.overwrite:
        print(
            f"INFO: Generating individual galaxy images and saving to: {galaxies_file}"
        )
        dataset = instantiate(cfg.datasets.sdss_galaxies,
                              batch_size=512,
                              n_batches=20,
                              num_workers=20)
        imagepath = galaxies_file.parent / (galaxies_file.stem + "_images.png")
        generate.generate(dataset,
                          galaxies_file,
                          imagepath,
                          n_plots=25,
                          global_params=("background", "slen"))

    # create figure classes and plot.
    ae_figures = AEReconstructionFigures(n_examples=5, **bfig_kwargs)
    ae_figures.save_figures(autoencoder, galaxies_file, cfg.plots.psf_file,
                            cfg.plots.sdss_pixel_scale)
    mpl.rc_file_defaults()
コード例 #8
0
ファイル: config.py プロジェクト: mantidproject/mantid
def reset_rcparams_to_default():
    """
    Reset the rcParams to the default settings.
    """
    mpl.rcParams.clear()
    mpl.rc_file_defaults()
    set_rcparams(DEFAULT_RCPARAMS)
    # We must keep our backend
    mpl.use(MPL_BACKEND)
コード例 #9
0
def reset_rcparams_to_default():
    """
    Reset the rcParams to the default settings.
    """
    mpl.rcParams.clear()
    mpl.rc_file_defaults()
    set_rcparams(DEFAULT_RCPARAMS)
    # We must keep our backend
    mpl.use(MPL_BACKEND)
コード例 #10
0
def plot_pca(X_scaled, class_labels, image_save_directory, y):

    m.rc_file_defaults()  # Reset sns
    pca_trafo = PCA().fit(X_scaled)
    pca_values = pca_trafo.transform(X_scaled)
    # from adjustText import adjust_text
    targets = np.array(y).flatten()
    fig, ax1 = plt.subplots(figsize=(10, 8))
    plt.semilogy(pca_trafo.explained_variance_ratio_, '--o')
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    plt.semilogy(pca_trafo.explained_variance_ratio_.cumsum(),
                 '--o',
                 color='green')
    plt.xlabel("Principal Component")
    plt.ylabel("Explained variance")
    plt.xticks(np.arange(0, len(pca_trafo.explained_variance_ratio_)))
    plt.hlines(0.95,
               0,
               len(pca_trafo.explained_variance_ratio_.cumsum()),
               colors='red',
               linestyles='solid',
               label='95% variance covered')

    vis.save_figure(plt.gcf(),
                    image_save_directory=image_save_directory,
                    filename='PCA_Variance_Coverage')

    fig = plt.figure()
    sns.heatmap(np.log(pca_trafo.inverse_transform(np.eye(X_scaled.shape[1]))),
                cmap="hot",
                cbar=True)
    necessary_components = pca_trafo.explained_variance_ratio_.cumsum()[
        pca_trafo.explained_variance_ratio_.cumsum() < 0.95]
    print(
        "95% variance covered with the {} first components. Values={}".format(
            len(necessary_components), necessary_components))

    vis.save_figure(plt.gcf(),
                    image_save_directory=image_save_directory,
                    filename='PCA_Heatmap')

    plt.figure(figsize=(10, 10))
    # plt.scatter(pca_values[:,0], pca_values[:,1], c=targets, edgecolor='none', label=class_labels.values(), alpha=0.5)
    for i, t in enumerate(set(targets)):
        idx = targets == t
        plt.scatter(pca_values[idx, 0],
                    pca_values[idx, 1],
                    label=class_labels[t],
                    edgecolor='none',
                    alpha=0.5)
    plt.legend(labels=class_labels.values(), bbox_to_anchor=(1, 1))
    plt.xlabel('Component 1')
    plt.ylabel('Component 2')

    vis.save_figure(plt.gcf(),
                    image_save_directory=image_save_directory,
                    filename='PCA_Plot')
コード例 #11
0
ファイル: t32.py プロジェクト: cppjocker/resonanse_dna
def test_visual():
    for k in range(0, 3):
        fig = plt.figure()
        ax = fig.add_subplot(111)  # The big subplot
        ax.set_aspect('equal', adjustable='box')

        plt.sca(ax)

        pairs_all = pd.DataFrame({'x': pd.Series(np.zeros(500), dtype=int), 'y': pd.Series(np.zeros(500), dtype=int)})
        sns.scatterplot(data=pairs_all, x="x", y="y", ax=ax, s=0.15)
        # plt.legend([], [], frameon=False)

        ax.legend(loc='upper left', markerscale=0.2, bbox_to_anchor=(1.04, 1), fontsize=2)
        ax.set_xlim(0, 100)
        ax.set_ylim(0, 100)

        # ax.set_xticklabels(ax.get_xticklabels(), fontsize = 4)
        # ax.set_yticklabels(ax.get_yticklabels(), fontsize = 4)

        plt.xticks(np.arange(0, 100 + 1, 5), fontsize=2 )
        plt.yticks(np.arange(0, 100 + 1, 5), fontsize=2 )

        ax.set_xlabel("")
        ax.set_ylabel("")

        fig.tight_layout()

        output_png = '{}_{}.png'.format('file1', k)
        plt.savefig(output_png, dpi=2400, bbox_inches='tight')
        fig.clear()
        plt.close(fig)

        fig = plt.figure()
        ax_w = fig.add_subplot(111)  # The big subplot
        # ax_w.set_aspect('equal', adjustable='box')

        plt.sca(ax_w)

        sns.set(font_scale=0.5)
        sns.heatmap(np.zeros((100, 100)), cbar_kws={'label': 'Energy'}, ax=ax_w, cmap='plasma')

        ax_w.set_xlim(0, 100)
        plt.xticks(fontsize=3)
        plt.yticks(fontsize=3)

        # ax_w.set_title(description, fontweight='bold', fontsize=4)

        fig.tight_layout()

        output_png = '{}_{}.png'.format('file2', k)
        plt.savefig(output_png, dpi=2400, bbox_inches='tight')
        fig.clear()
        plt.close(fig)

        matplotlib.rc_file_defaults()
コード例 #12
0
def plot_correlation_bar(X_scaled, conf, image_save_directory, y_scaled):
    m.rc_file_defaults()  # Reset sns
    corr = X_scaled.corrwith(y_scaled[conf['Common'].get('class_name')],
                             axis=0)
    corr.sort_values().plot.barh(color='blue',
                                 title='Strength of Correlation',
                                 figsize=(10, 25))
    print(corr)
    plt.gcf()

    vis.save_figure(plt.gcf(),
                    image_save_directory=image_save_directory,
                    filename='Correlation_Strength')
コード例 #13
0
ファイル: pyplots.py プロジェクト: cycomanic/plugins
    def run(self):
        if matplotlib is None:
            msg = req_missing(['matplotlib'],
                              'use the plot directive',
                              optional=True)
            return [
                nodes.raw('',
                          '<div class="text-error">{0}</div>'.format(msg),
                          format='html')
            ]

        if not self.arguments and not self.content:
            raise self.error(
                'The plot directive needs either an argument or content.')

        if self.arguments and self.content:
            raise self.error(
                'The plot directive needs either an argument or content, not both.'
            )

        if self.arguments:
            plot_path = self.arguments[0]
            with io.open(plot_path, encoding='utf-8') as fd:
                data = fd.read()
        elif self.content:
            data = '\n'.join(self.content)
            plot_path = md5(data).hexdigest()

        # Always reset context
        plt.close('all')
        matplotlib.rc_file_defaults()
        # Run plot
        exec(data)

        out_path = os.path.join(self.out_dir, plot_path + '.svg')
        plot_url = '/' + os.path.join('pyplots', plot_path + '.svg').replace(
            os.sep, '/')

        figures = [
            manager.canvas.figure for manager in
            matplotlib._pylab_helpers.Gcf.get_all_fig_managers()
        ]
        for figure in figures:
            makedirs(os.path.dirname(out_path))
            figure.savefig(
                out_path, format='svg'
            )  # Yes, if there's more than one, it's overwritten, sucks.
        self.arguments = [plot_url]
        return super(PyPlot, self).run()
コード例 #14
0
def cycle_run_simulation(mat_values, mat_pos, number_of_run,
                         from_zero_to_interval, from_zero_to_noise):
    global dir_res_automatic_sim, mat_rows
    dir_name = dir_res_automatic_sim + str(calendar.timegm(
        time.gmtime())) + '/'
    check_dir_res(dir_name)

    for index_interval in [1, 4, 8, 12]:
        save_dir = dir_name + 'Interval #' + str(index_interval) + '/'
        res_single_interval = pd.DataFrame(columns=['Noise', 'AVG', 'Total'])
        index_noise = 1
        mat_pos2 = deepcopy(mat_pos)
        mat_for_position = mat_pos2[:, 0:index_interval]
        list_mat_for_sim = []
        for i in range(0, mat_rows):
            for j in range(0, index_interval):
                list_mat_for_sim.append(mat_for_position[i, j])
        for index_noise in range(1, from_zero_to_noise + 1):
            if index_noise >= mat_rows * index_interval:
                break
            else:
                mat_values2 = deepcopy(mat_values)
                mat_for_simulation = mat_values2[:, 0:index_interval]
                avg_real_values, list_avg_mat_modified, result = run_simulation(
                    mat_for_simulation, list_mat_for_sim, number_of_run,
                    index_interval, index_noise)
                res_single_interval = print_single_result(
                    mat_rows * index_interval, index_noise, avg_real_values,
                    list_avg_mat_modified, result,
                    save_dir + 'N(' + str(index_noise) + ')/', True,
                    res_single_interval)
        res_single_interval['Real_AVG'] = avg_real_values
        sns.boxplot(data=res_single_interval, x='Total', y='AVG')
        plt.savefig(save_dir + f'Interval {index_interval} - Total.pdf')
        plt.close()
        del res_single_interval['Total']
        res_single_interval.to_csv(save_dir + f'Interval {index_interval}.csv')
        sns.lineplot(x='Noise',
                     y='value',
                     hue='variable',
                     data=pd.melt(res_single_interval, ['Noise']))
        plt.savefig(save_dir + f'Interval {index_interval} (Line).pdf')
        plt.close()
        plt.rcParams['figure.figsize'][0] = round(index_noise / 2)
        sns.boxplot(data=res_single_interval, x="Noise", y="AVG")
        plt.savefig(save_dir + f'Interval {index_interval} (BoxPlot).pdf')
        plt.close()
        mpl.rc_file_defaults()
コード例 #15
0
def plot_den(sim, snap, num, subdir="", voff=0, box=10, elem="Si", ion=2):
    """Plot density"""
    hspec = get_hspec(sim, snap, snr=20., box=box)
    #Adjust the default plot parameters, which do not scale well in a gridspec.
    matplotlib.rc('xtick', labelsize=10)
    matplotlib.rc('ytick', labelsize=10)
    matplotlib.rc('axes', labelsize=10)
    matplotlib.rc('font', size=8)
    matplotlib.rc('lines', linewidth=1.5)
    gs = gridspec.GridSpec(9, 2)
    ax3 = plt.subplot(gs[0:4, 0])
    plt.sca(ax3)
    xoff = hspec.plot_spectrum(elem, ion, -1, num, flux=False)
    xlim = plt.xlim()
    ax3.xaxis.set_label_position('top')
    ax3.xaxis.tick_top()
    voff += xoff
    ax2 = plt.subplot(gs[5:, 0])
    plt.sca(ax2)
    dxlim = hspec.plot_density(elem, ion, num)
    plt.ylabel(r"n$_\mathrm{" + elem + "II}$ (cm$^{-3}$)")
    plt.ylim(ymin=1e-9)
    ax1 = plt.subplot(gs[4, 0])
    plt.sca(ax1)
    xscale = dxlim * hspec.velfac / xlim[1]
    hspec.plot_den_to_tau(elem,
                          ion,
                          num,
                          thresh=1e-9,
                          xlim=200,
                          voff=voff,
                          xscale=xscale)
    ax1.axes.get_xaxis().set_visible(False)
    plt.xlabel("")
    plt.xlim(xlim)
    sdir = path.join(outdir, "spectra/" + subdir)
    if not path.exists(sdir):
        os.mkdir(sdir)
    save_figure(
        path.join(sdir,
                  str(num) + "_cosmo" + str(sim) + "_" + elem + "_colden"))
    plt.clf()
    matplotlib.rc_file_defaults()
コード例 #16
0
def plot_t_sne(X_scaled_subset, y_scaled_subset, class_labels,
               image_save_directory):
    ### Visualize Data with t-SNE
    # Select a random subset to visualize
    import random
    # Reduce the training set with the number of samples randomly chosen
    # X_train_index_subset = sup.get_data_subset_index(1000, X_scaled)
    np.random.seed(0)
    # X_embedded = TSNE(n_components=2, perplexity=5.0, early_exaggeration=12.0, n_iter=5000,
    #                  n_iter_without_progress=1000, learning_rate=10).fit_transform(embedded)
    X_embedded = TSNE(n_components=2,
                      perplexity=10.0,
                      early_exaggeration=100.0,
                      n_iter=5000,
                      n_iter_without_progress=1000,
                      learning_rate=10).fit_transform(X_scaled_subset)
    #### Plot t-SNE with best parameters
    m.rc_file_defaults()  # Reset sns
    # Plot with texts added to the graphs
    # from adjustText import adjust_text
    #targets = np.array(y[X_train_index_subset]).flatten()
    plt.figure(figsize=(10, 10))
    texts = []

    if y_scaled_subset is not None and class_labels is not None:
        print("Plot t-sne with known classes")
        for i, t in enumerate(set(y_scaled_subset)):
            idx = y_scaled_subset == t
            # for x, y in zip(X_embedded[idx, 0], X_embedded[idx, 1]):
            # texts.append(plt.text(x, y, t))
            plt.scatter(X_embedded[idx, 0],
                        X_embedded[idx, 1],
                        label=class_labels[t])
        # adjust_text(texts, force_points=0.2, force_text=0.2, expand_points=(1,1), expand_text=(1,1), arrowprops=dict(arrowstyle="-", color='black', lw=0.5))
        plt.legend(bbox_to_anchor=(1, 1))
    else:
        print("Plot t-sne without known classes")
        plt.scatter(X_embedded[:, 0], X_embedded[:, 1])

    vis.save_figure(plt.gcf(),
                    image_save_directory=image_save_directory,
                    filename='T-SNE_Plot')
コード例 #17
0
def print_pretty_spectra(snapnum, simname):
    """Print pretty versions of spectra from a simulation snapshot"""
    rands = np.random.randint(0,1000,20)
    halo = ps.CIVPlot(snapnum, simname, savefile="nr_dla_spectra.hdf5", spec_res = 50.)
    offsets = halo.get_offsets()[rands]
    np.savetxt("tau_Rperp_table.txt", np.sort(np.vstack((np.arange(0,1000), halo.get_offsets())).T,0))
    for nn in rands:
        gs = gridspec.GridSpec(9,2)
        axes = (plt.subplot(gs[0:4,0]), plt.subplot(gs[5:,0]), plt.subplot(gs[4,0]))
        #Adjust the default plot parameters, which do not scale well in a gridspec.
        matplotlib.rc('xtick', labelsize=8)
        matplotlib.rc('ytick', labelsize=8)
        matplotlib.rc('axes', labelsize=8)
        matplotlib.rc('font', size=6)
        matplotlib.rc('lines', linewidth=1.5)
        plot_den(halo, axes, nn+1000, color="red")
        plot_den(halo, axes, nn)
        np.savetxt(str(nn)+"_tau_DLA.txt",halo.get_tau("C",4,1548,nn))
        np.savetxt(str(nn)+"_tau_CGM.txt",halo.get_tau("C",4,1548,nn+1000))
        axes[0].text(-500, 0.2,"offset (prop kpc): "+str(offsets[nn]*0.33333/0.7))
        odir = path.join(outdir, "spectra")
        save_figure(path.join(odir,str(nn)+"_cosmo"+"_CIV_spec"))
        plt.clf()
        matplotlib.rc_file_defaults()
コード例 #18
0
def run_colocation(
        sp_data,
        n_neighbours=None,
        model_name='CoLocatedGroupsSklearnNMF',
        verbose=True,
        return_all=True,
        train_args={
            'n_fact': [30],
            'n_iter': 20000,
            'sample_name_col': None,
            'mode': 'normal',
            'n_type': 'restart',
            'n_restarts': 5
        },
        model_kwargs={
            'init': 'random',
            'random_state': 0,
            'nmf_kwd_args': {
                'tol': 0.00001
            }
        },
        posterior_args={},
        export_args={
            'path': "./results",
            'run_name_suffix': '',
            'top_n': 10
        }):
    r""" Run co-located cell type combination model: train for specified number of factors,
     evaluate the stability, save, export results and save diagnostic plots

    Parameters
        ----------
        sp_data:
             Anndata object with cell2location model output in .uns['mod']
             Alternatively np.ndarray with cell density parameters
             Alternatively pd.DataFrame with cell density parameters (with location and cell type names)

    Returns
        -------
        dict
            dictionary {'mod','sc_data','model_name', 'train_args','posterior_args',
              'export_args', 'run_name', 'run_time'}
    """

    # set default parameters

    d_train_args = {
        'n_fact': [30],
        'n_iter': 20000,
        'learning_rate': 0.01,
        'use_cuda': False,
        'sample_prior': False,
        'posterior_summary': 'post_sample_q05',
        'sample_name_col': None,
        'mode': 'normal',
        'n_type': 'restart',
        'n_restarts': 5,
        'include_source_location': False
    }

    d_posterior_args = {
        'n_samples': 1000,
        'evaluate_stability_align': False,
        'evaluate_stability_transpose': True,
        'mean_field_slot': "init_1"
    }

    d_export_args = {
        'path': "./results",
        'plot_extension': "pdf",
        'scanpy_coords_name': 'spatial',
        'scanpy_plot_vmax': 'p99.2',
        'scanpy_plot_size': 1.3,
        'save_model': True,
        'run_name_suffix': '',
        'export_q05': False,
        'plot_histology': False,
        'top_n': 10
    }

    # replace defaults with parameters supplied
    for k in train_args.keys():
        d_train_args[k] = train_args[k]
    train_args = d_train_args
    for k in posterior_args.keys():
        d_posterior_args[k] = posterior_args[k]
    posterior_args = d_posterior_args
    for k in export_args.keys():
        d_export_args[k] = export_args[k]
    export_args = d_export_args

    # start timing
    start = time.time()

    sp_data = sp_data.copy()

    # import the specified version of the model
    if type(model_name) is str:
        import cell2location.models as models
        Model = getattr(models, model_name)
    else:
        Model = model_name

    ####### Preparing data #######
    # extract cell density parameter
    X_data = sp_data.uns['mod'][
        train_args['posterior_summary']]['spot_factors']
    var_names = sp_data.uns['mod']['fact_names']
    obs_names = sp_data.obs_names
    if train_args['sample_name_col'] is None:
        # if slots needed to generate scanpy plots are present, use scanpy spatial slot name:
        sc_spatial_present = np.isin(list(sp_data.uns.keys()), ['spatial'])[0]
        if sc_spatial_present:
            sp_data.obs['sample'] = list(sp_data.uns['spatial'].keys())[0]
        else:
            sp_data.obs['sample'] = 'sample'

        train_args['sample_name_col'] = 'sample'

    sample_id = sp_data.obs[train_args['sample_name_col']]

    if n_neighbours is not None:
        neighbours = spatial_neighbours(
            coords=sp_data.obsm['spatial'],
            n_sp_neighbors=n_neighbours,
            radius=None,
            include_source_location=train_args['include_source_location'],
            sample_id=sample_id)
        neighbours_sum = sum_neighbours(X_data, neighbours)
        X_data = np.concatenate([X_data, neighbours_sum], axis=1)
        var_names = list(var_names) + ['neigh_' + i for i in var_names]

    res_dict = {}

    for n_fact in train_args['n_fact']:
        ####### Creating model #######
        if verbose:
            print('### Creating model ### - time ' +
                  str(np.around((time.time() - start) / 60, 2)) + ' min')

        # create model class
        n_fact = int(n_fact)
        mod = Model(n_fact,
                    X_data,
                    n_iter=train_args['n_iter'],
                    verbose=verbose,
                    var_names=var_names,
                    obs_names=obs_names,
                    fact_names=['fact_' + str(i) for i in range(n_fact)],
                    sample_id=sample_id,
                    **model_kwargs)

        ####### Print run name #######
        run_name = str(mod.__class__.__name__) + '_' + str(mod.n_fact) + 'combinations_' \
                   + str(mod.n_obs) + 'locations_' + str(mod.n_var) + 'factors' \
                   + export_args['run_name_suffix']
        path_name = str(mod.__class__.__name__) + '_' \
                    + str(mod.n_obs) + 'locations_' + str(mod.n_var) + 'factors' \
                    + export_args['run_name_suffix']

        print('### Analysis name: ' +
              run_name)  # analysis name is always printed

        # create the export directory
        path = export_args['path'] + path_name + '/'
        if not os.path.exists(path):
            os.makedirs(os.path.abspath(path))

        ####### Sampling prior #######
        if train_args['sample_prior']:
            raise ValueError('Sampling prior not implemented yet')

        ####### Training model #######
        if verbose:
            print('### Training model###')
        if train_args['mode'] == 'normal':
            mod.fit(n=train_args['n_restarts'], n_type=train_args['n_type'])

        elif train_args['mode'] == 'tracking':
            raise ValueError('tracking training not implemented yet')
        else:
            raise ValueError(
                "train_args['mode'] can be only 'normal' or 'tracking'")

        ####### Evaluate stability of training #######
        fig_path = path + 'stability_plots/'
        if not os.path.exists(fig_path):
            mkdir(fig_path)
        if train_args['n_restarts'] > 1:
            n_plots = train_args['n_restarts'] - 1
            ncol = int(np.min((n_plots, 3)))
            nrow = np.ceil(n_plots / ncol)
            plt.figure(figsize=(5 * nrow, 5 * ncol))
            mod.evaluate_stability(
                'cell_type_factors',
                n_samples=posterior_args['n_samples'],
                align=posterior_args['evaluate_stability_align'])
            plt.tight_layout()
            save_plot(fig_path,
                      filename=f'cell_type_factors_n_fact{mod.n_fact}',
                      extension=export_args['plot_extension'])
            if verbose:
                plt.show()
            plt.close()

            plt.figure(figsize=(5 * nrow, 5 * ncol))
            mod.evaluate_stability(
                'location_factors',
                n_samples=posterior_args['n_samples'],
                align=posterior_args['evaluate_stability_align'])
            plt.tight_layout()
            save_plot(fig_path,
                      filename=f'location_factors_n_fact{mod.n_fact}',
                      extension=export_args['plot_extension'])
            if verbose:
                plt.show()
            plt.close()

        ####### Evaluating parameters / sampling posterior #######
        if verbose:
            print(
                f'### Evaluating parameters / sampling posterior ### - time {np.around((time.time() - start) / 60, 2)} min'
            )
        # extract all parameters from parameter store or sample posterior
        mod.sample_posterior(node='all',
                             n_samples=posterior_args['n_samples'],
                             save_samples=False,
                             mean_field_slot=posterior_args['mean_field_slot'])

        # evaluate predictive accuracy of the model
        mod.compute_expected()

        # Plot predictive accuracy
        fig_path = path + 'predictive_accuracy/'
        if not os.path.exists(fig_path):
            mkdir(fig_path)
        try:
            plt.figure(figsize=(5.5, 5.5))
            mod.plot_posterior_mu_vs_data()
            plt.tight_layout()
            save_plot(fig_path,
                      filename=f'data_vs_posterior_mean_n_fact{mod.n_fact}',
                      extension=export_args['plot_extension'])
            if verbose:
                plt.show()
            plt.close()
        except Exception as e:
            print(
                'Some error in plotting `mod.plot_posterior_mu_vs_data()`\n ' +
                str(e))

        ####### Export summarised posterior & Saving results #######
        if verbose:
            print('### Saving results ###')

        # extract parameters into DataFrames
        mod.sample2df(node_name='nUMI_factors',
                      ct_node_name='cell_type_factors')

        # export results to scanpy object
        sp_data = mod.annotate_adata(sp_data)  # as columns to .obs
        sp_data = mod.export2adata(
            sp_data,
            slot_name=f'mod_coloc_n_fact{mod.n_fact}')  # as a slot in .uns

        # print the fraction of cells of each type located to each combination
        ct_loadings = mod.print_gene_loadings(
            loadings_attr='cell_type_fractions',
            gene_fact_name='cell_type_fractions',
            top_n=export_args['top_n'])

        # save
        save_path = path + 'factor_markers/'
        if not os.path.exists(save_path):
            mkdir(save_path)
        ct_loadings.to_csv(f'{save_path}n_fact{mod.n_fact}.csv')

        save_path = path + 'location_factors_mean/'
        if not os.path.exists(save_path):
            mkdir(save_path)
        mod.location_factors_df.to_csv(f'{save_path}n_fact{mod.n_fact}.csv')

        save_path = path + 'cell_type_fractions_mean/'
        if not os.path.exists(save_path):
            mkdir(save_path)
        mod.cell_type_fractions.to_csv(f'{save_path}n_fact{mod.n_fact}.csv')

        if export_args['export_q05']:
            save_path = path + 'q05_param/'
            if not os.path.exists(save_path):
                mkdir(save_path)
            mod.location_factors_q05.to_csv(f'{path}location_factors_q05.csv')
            mod.cell_type_fractions_q05.to_csv(
                f'{path}cell_type_fractions_q05.csv')

        # A convenient way to explore the composition of cell type combinations / microenvironments is by using a heatmap:
        # make nice names
        mod.cell_type_fractions.columns = [
            sub('mean_cell_type_factors', '', i)
            for i in mod.cell_type_fractions.columns
        ]

        fig_path = path + 'cell_type_fractions_heatmap/'
        if not os.path.exists(fig_path):
            mkdir(fig_path)
        # plot co-occuring cell type combinations
        mod.plot_gene_loadings(
            mod.var_names_read,
            mod.var_names_read,
            fact_filt=mod.fact_filt,
            loadings_attr='cell_type_fractions',
            gene_fact_name='cell_type_fractions',
            cmap='RdPu',
            figsize=[5 + 0.12 * mod.n_fact, 5 + 0.1 * mod.n_var])

        save_plot(fig_path,
                  filename=f'n_fact{mod.n_fact}',
                  extension=export_args['plot_extension'])
        if verbose:
            plt.show()
        plt.close()

        ####### Plotting posterior of W / cell locations #######
        # Finally we need to examine where in the tissue each cell type combination / microenvironment is located
        rcParams["figure.figsize"] = [5, 6]
        rcParams["axes.facecolor"] = "black"
        if verbose:
            print('### Plotting cell combinations in 2D ###')

        data_samples = sp_data.obs[train_args['sample_name_col']].unique()
        cluster_plot_names = mod.location_factors_df.columns

        fig_path = path + 'spatial/'

        try:
            for s in data_samples:
                # if slots needed to generate scanpy plots are present, use scanpy:
                sc_spatial_present = np.any(
                    np.isin(list(sp_data.uns.keys()), ['spatial']))

                if sc_spatial_present:

                    sc.settings.figdir = fig_path

                    s_ind = sp_data.obs[train_args['sample_name_col']] == s
                    s_keys = list(sp_data.uns['spatial'].keys())
                    s_spatial = np.array(s_keys)[[s in i for i in s_keys]][0]

                    # plot cell density in each combination
                    sc.pl.spatial(
                        sp_data[s_ind, :],
                        cmap='magma',
                        library_id=s_spatial,
                        color=cluster_plot_names,
                        ncols=6,
                        size=export_args['scanpy_plot_size'],
                        img_key='hires',
                        alpha_img=0,
                        vmin=0,
                        vmax=export_args['scanpy_plot_vmax'],
                        save=
                        f"cell_density_mean_n_fact{mod.n_fact}_s{s}_{export_args['scanpy_plot_vmax']}.{export_args['plot_extension']}",
                        show=False)

                    if export_args['plot_histology']:
                        sc.pl.spatial(
                            sp_data[s_ind, :],
                            cmap='magma',
                            library_id=s_spatial,
                            color=cluster_plot_names,
                            ncols=6,
                            size=export_args['scanpy_plot_size'],
                            img_key='hires',
                            alpha_img=1,
                            vmin=0,
                            vmax=export_args['scanpy_plot_vmax'],
                            save=
                            f"cell_density_mean_n_fact{mod.n_fact}_s{s}_{export_args['scanpy_plot_vmax']}.{export_args['plot_extension']}",
                            show=False)

                else:

                    # if coordinates exist plot
                    if export_args['scanpy_coords_name'] is not None:
                        # move spatial coordinates to obs for compatibility with our plotter
                        sp_data.obs['imagecol'] = sp_data.obsm[
                            export_args['scanpy_coords_name']][:, 0]
                        sp_data.obs['imagerow'] = sp_data.obsm[
                            export_args['scanpy_coords_name']][:, 1]

                        p = c2lpl.plot_factor_spatial(
                            adata=sp_data,
                            fact_ind=np.arange(
                                mod.location_factors_df.shape[1]),
                            fact=mod.location_factors_df,
                            cluster_names=cluster_plot_names,
                            n_columns=6,
                            trans='log10',
                            max_col=100,
                            col_breaks=[0, 1, 10, 20, 50],
                            sample_name=s,
                            samples_col=train_args['sample_name_col'],
                            obs_x='imagecol',
                            obs_y='imagerow')
                        p.save(
                            filename=fig_path +
                            f"cell_density_mean_n_fact{mod.n_fact}_s{s}.{export_args['plot_extension']}"
                        )

        except Exception as e:
            print(
                'Some error in plotting with scanpy or `cell2location.plt.plot_factor_spatial()`\n '
                + str(e))

        rcParams["axes.facecolor"] = "white"
        matplotlib.rc_file_defaults()

        # save model object and related annotations
        save_path = path + 'models/'
        if not os.path.exists(save_path):
            mkdir(save_path)
        if export_args['save_model']:
            # save the model and other objects
            res_dict_1 = {
                'mod': mod,
                'model_name': model_name,
                'train_args': train_args,
                'posterior_args': posterior_args,
                'export_args': export_args,
                'run_name': run_name,
                'run_time': str(np.around(
                    (time.time() - start) / 60, 2)) + ' min'
            }
            pickle.dump(res_dict_1,
                        file=open(save_path + f'model_n_fact{mod.n_fact}.p',
                                  "wb"))

        else:
            # just save the settings
            res_dict_1 = {
                'model_name': model_name,
                'train_args': train_args,
                'posterior_args': posterior_args,
                'export_args': export_args,
                'run_name': run_name,
                'run_time': str(np.around(
                    (time.time() - start) / 60, 2)) + ' min'
            }
            pickle.dump(res_dict_1,
                        file=open(save_path + f'model_n_fact{mod.n_fact}.p',
                                  "wb"))

        res_dict[f'n_fact{mod.n_fact}'] = res_dict_1

        if verbose:
            print('### Done ### - time ' + res_dict['run_time'])

    save_path = path + 'anndata/'
    if not os.path.exists(save_path):
        mkdir(save_path)
    # save anndata with exported posterior
    sp_data.write(filename=f'{save_path}sp.h5ad', compression='gzip')

    if return_all:
        return res_dict, sp_data
    else:
        del res_dict
        del res_dict_1
        del mod
        gc.collect()
        return str((time.time() - start) / 60) + ' min'
コード例 #19
0
import numpy as np
import tensorflow as tf
import matplotlib as mpl

from tqdm import tqdm

from absl import app
from absl import flags

from matplotlib import pyplot as plt
from matplotlib import gridspec as grid

import includes.visualization as visualization
from includes.utils import load_data, Dataset

mpl.rc_file_defaults()

tf.logging.set_verbosity(tf.logging.ERROR)

FLAGS = flags.FLAGS

flags.DEFINE_string("model", "gumbolt-vae",
                    "Model to use [vae, dvae, gumbolt-vae, gvae]")
flags.DEFINE_string("datagroup", "mnist",
                    "Datagroup to use [mnist, spiral, graph]")
flags.DEFINE_string(
    "dataset", "static",
    "Dataset to use {mnist:[static], spiral:[normal], graph:[citeseer]}")

flags.DEFINE_integer("latent_dim", 10,
                     "Number of dimensions for latent variable Z")
コード例 #20
0
def clear_state(plot_rcparams):
    plt.close('all')
    if hasattr(matplotlib, 'rc_file_defaults'):
        matplotlib.rc_file_defaults()
    if hasattr(matplotlib, 'rcParams'):
        matplotlib.rcParams.update(plot_rcparams)
コード例 #21
0
ファイル: MyPlot.py プロジェクト: galdreiman/PAC
 def set_rc_params(params):
     # Reset params from rc file if matplotlib installation supports it.
     if hasattr(matplotlib, 'rc_file_defaults'):
         matplotlib.rc_file_defaults()
     if params:
         matplotlib.rcParams.update(params)
コード例 #22
0
def analyse_features(features, y, class_labels, conf, image_save_directory):
    '''


    '''

    # Feature Visualization
    # Here, feature selection and visulization of datasets is performed Methods - Feature visualization through
    # t - SNE - Feature visualization and analysis through PCA

    ### Standardize Data for Feature Selection and Visualization
    # Z - Normalize the data around zero and divided by standard deviation.Fit the normalizer on the training data and
    # transform the training and the test data.The reason is that the scaler only must depend on the training data,
    # in order to prevent leakage of information from the test data.

    # === Remove Columns that are all the same
    features_reduced = features.loc[:, np.invert(unique_cols(features))]
    print("Reduce columns that are duplicated in terms of values.")
    features = features_reduced

    # === Select the best type of scaler ===#
    X_scaled = rescale_features(features)
    if y is not None:
        y_scaled = rescale_outcomes(conf, features, y)
        print("Merged features and outcomes to use in correlation matrix")
        total_values_scaled = X_scaled.join(y_scaled)

        plot_correlation_matrix2(conf, image_save_directory,
                                 total_values_scaled)

        plot_correlation_bar(X_scaled, conf, image_save_directory, y_scaled)
    else:
        total_values_scaled = X_scaled
        print("Only features will be used in for correlations.")

    ### Feature and Outcomes Correlation Matrix
    plot_correlation_matrix(features, image_save_directory,
                            total_values_scaled)

    #fixme: Class names not correct shown
    plot_spearman_correlation_matrix(image_save_directory, total_values_scaled)

    #from tabulate import tabulate
    #print(tabulate(X_scaled, headers='keys', tablefmt='psql'))

    try:
        plot_hierarchical_linkage(X_scaled, conf, image_save_directory)
    except:
        warnings.warn("Cannot execute hiearchical linkage")
        traceback.print_exc()

    ### Feature visualization with Parallel Coordinates
    # Select a random subset to visualize
    import random

    # Reduce the training set with the number of samples randomly chosen
    X_train_index_subset = sup.get_random_data_subset_index(1000, features)
    X_train_scaled_subset = X_scaled.iloc[X_train_index_subset, :]

    if y is not None:
        df_y = y_scaled = pd.DataFrame(
            data=y.reshape(-1, 1),
            index=features.index,
            columns=[conf['Common'].get('class_name')])
        total_values = features.join(df_y)
        print(
            "Merged features and outcomes to use in correlation matrix unscaled"
        )
        y_train_subset = np.array(y[X_train_index_subset]).flatten()

        # Select column values to use in the correlation plot
        feature_plot = list(range(0, 10, 1))
        # cols = ['MA2Norm', 'MA50Norm', 'MA200Norm', 'MA400Norm', 'MA200NormDiff', 'MA400NormDiff']
        cols = total_values.columns[feature_plot]
        print(feature_plot)
        print(cols)

        comparison_name = conf['Common'].get('class_name')
        print("Class name: ", comparison_name)

        df_fv = total_values.iloc[X_train_index_subset, :]

        # Use parallel coordinates to visualize the classes and all features for plotting
        # https://plot.ly/python/parallel-coordinates-plot/
        # http://benalexkeen.com/parallel-coordinates-in-matplotlib/

        m.rc_file_defaults()  # Reset sns
        colors = ['#2e8ad8', '#cd3785', '#c64c00', '#889a00']
        plot_parallel_coordinates(df_fv, cols, colors, comparison_name, conf,
                                  image_save_directory)
        #plot_parallel_coordinates(X_train_index_subset, cols, comparison_name, total_values)
    else:
        y_train_subset = None
        warnings.warn(
            "No y value. Parallel coordinates will not be calculated.")

    #### t-SNE Parameter Grid Search
    #calibrate_tsne = False
    #if calibrate_tsne:
    #    find_tsne_parmeters(X_train_scaled_subset, y_train_scaled_subset, class_labels)

    # t-SNE plot
    plot_t_sne(X_train_scaled_subset, y_train_subset, class_labels,
               image_save_directory)

    ### UMAP Cluster Analysis
    plot_umap(X_scaled, class_labels, image_save_directory, y)

    ### PCA Analysis
    try:
        plot_pca(X_scaled, class_labels, image_save_directory, y)
    except:
        warnings.warn("Cannot execute PCA")
        traceback.print_exc()
コード例 #23
0
def model_performance(y_true,
                      y_preds,
                      artifact_path,
                      artifact_ext,
                      threshold,
                      show_plots=False):
    matplotlib.rc_file_defaults()
    y_preds_bin = np.where(y_preds > threshold, 1, 0)

    #ROC Curve
    plt.clf()
    plt.close()
    plt.figure()
    false_positive_rate, recall, thresholds = roc_curve(y_true, y_preds)
    roc_auc = auc(false_positive_rate, recall)
    plt.title('Receiver Operating Characteristic (ROC)')
    plt.plot(false_positive_rate, recall, 'b', label='AUC = %0.3f' % roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0, 1], [0, 1], 'r--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.ylabel('Recall')
    plt.xlabel('Fall-out (1-Specificity)')
    plt.savefig(os.path.join(artifact_path, f'roc_curve_{artifact_ext}.png'),
                bbox_inches="tight")

    if show_plots:
        plt.show()

    print('auc score:', roc_auc)

    #Classification Report
    print(classification_report(y_true, y_preds_bin))

    #Confusion Matrix
    plt.clf()
    plt.close()
    plt.figure()
    cm = confusion_matrix(y_true, y_preds_bin)
    labels = ['0', '1']
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm,
                xticklabels=labels,
                yticklabels=labels,
                annot=True,
                fmt='d',
                cmap="Blues",
                vmin=0.2)
    plt.title('Confusion Matrix')
    plt.ylabel('True Class')
    plt.xlabel('Predicted Class')
    plt.savefig(os.path.join(artifact_path,
                             f'confusion_matrix_{artifact_ext}.png'),
                bbox_inches="tight")

    if show_plots:
        plt.show()

    #Probability Histogram
    plt.clf()
    prob_hist = pd.Series(y_preds).hist(cumulative=True, bins=20)
    fig = prob_hist.get_figure().savefig(os.path.join(
        artifact_path, f'hist_plot_{artifact_ext}.png'),
                                         bbox_inches="tight")

    return classification_report(y_true, y_preds_bin, output_dict=True)


##still need to test it...
#def feature_importance():
#    shap_values = shap.TreeExplainer(bst).shap_values(X_val)
#    shap.summary_plot(shap_values[1], X_val)
コード例 #24
0
def analyze_timegraph(source, features, y, conf, image_save_directory):
    '''


    '''

    from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
    from statsmodels.stats.diagnostic import acorr_ljungbox

    # Autocorrelation

    # Source: https://machinelearningmastery.com/gentle-introduction-autocorrelation-partial-autocorrelation/

    #### Autoregression Intuition

    # Consider a time series that was generated by an autoregression(AR) process with a lag of k.We know that the
    # ACF describes the autocorrelation between an observation and another observation at a prior time step
    # that includes direct and indirect dependence information.This means we would expect the ACF for
    # the AR(k) time series to be strong to a lag of k and the inertia of that relationship would
    # carry on to subsequent lag values, trailing off at some point as the effect was weakened.
    # We know that the PACF only describes the direct relationship between an observation and its lag.
    # This would suggest that there would be no correlation for lag values beyond k.
    # This is exactly the expectation of the ACF and PACF plots for an AR(k) process.

    #### Moving Average Intuition

    # Consider a time series that was generated by a moving average(MA) process with a lag of k.Remember that the
    # moving average process is an autoregression model of the time series of residual errors from prior
    # predictions.Another way to think about the moving average model is that it corrects future forecasts
    # based on errors made on recent forecasts.We would expect the ACF for the MA(k) process to show a strong
    # correlation with recent values up to the lag of k, then a sharp decline to low or no correlation.
    # By definition, this is how the process was generated.For the PACF, we would expect the plot to show a
    # strong relationship to the lag and a trailing off of correlation from the lag onwards.Again, this is
    # exactly the expectation of the ACF and PACF plots for an MA(k) process.

    # if the autocorrelation function has a very long tail, then it is no stationary process

    m.rc_file_defaults()  # Reset sns

    # Here, the time graph is selected
    print(
        "Plot the total autocorrelation of the price.The dark blue values are the correlation of the price with "
        "the lag. The light blue cone is the confidence interval. If the correlation is > cone, the value is "
        "significant.")

    vis.plot_autocorrelation(np.log(source['Close']),
                             "OMXS30",
                             mode="acf",
                             lags=None,
                             xlim=None,
                             ylim=None,
                             image_save_directory=image_save_directory)

    vis.plot_autocorrelation(np.log(source['Close']),
                             "OMXS30_700_first",
                             mode="acf",
                             lags=None,
                             xlim=[0, 700],
                             ylim=None,
                             image_save_directory=image_save_directory)

    vis.plot_autocorrelation(np.log(source['Close']),
                             "OMXS30",
                             mode="pacf",
                             lags=200,
                             xlim=None,
                             ylim=None,
                             image_save_directory=image_save_directory)

    vis.plot_autocorrelation(np.log(source['Close']),
                             "OMXS30_first_10",
                             mode="pacf",
                             lags=50,
                             xlim=[0, 10],
                             ylim=None,
                             image_save_directory=image_save_directory)

    vis.plot_autocorrelation(features.MA200Norm,
                             "OMXS30_MA200",
                             mode="acf",
                             lags=None,
                             xlim=None,
                             ylim=None,
                             image_save_directory=image_save_directory)

    vis.plot_autocorrelation(features.MA200Norm,
                             "OMXS30_MA200_first_200",
                             mode="acf",
                             lags=None,
                             xlim=[0, 200],
                             ylim=None,
                             image_save_directory=image_save_directory)

    vis.plot_autocorrelation(features.MA200Norm,
                             "OMXS30_MA200",
                             mode="pacf",
                             lags=200,
                             xlim=None,
                             ylim=None,
                             image_save_directory=image_save_directory)

    # Plot difference between time values to see if the differences are stationary
    diff = pd.DataFrame(
        data=np.divide(source['Close'] -
                       source['Close'].shift(1), source['Close'])).set_index(
                           source['Date'])
    diff = diff.iloc[1:, :]
    fig = plt.figure(figsize=(15, 4))
    plt.plot(source['Date'].iloc[1:], diff)
    plt.grid()

    # Here, the time graph is selected
    print(
        "Plot the total autocorrelation of the price. The dark blue values are the correlation of the price with the lag. "
        +
        "The light blue cone is the confidence interval. If the correlation is > cone, the value is significant."
    )

    vis.plot_autocorrelation(diff,
                             "OMXS30_difference",
                             mode="acf",
                             lags=None,
                             xlim=[0, 50],
                             ylim=[-0.2, 0.2],
                             image_save_directory=image_save_directory)

    vis.plot_autocorrelation(diff,
                             "OMXS30_difference",
                             mode="pacf",
                             lags=100,
                             xlim=[0, 50],
                             ylim=[-0.2, 0.2],
                             image_save_directory=image_save_directory)

    #Plot temporal correlation
    X_scaled, y_scaled = rescale(conf, features, y)
    # Correlation graphs for temporal correlation
    vis.plot_temporal_correlation_feature(X_scaled,
                                          conf['Common'].get('dataset_name'),
                                          image_save_directory, source,
                                          y_scaled)
コード例 #25
0
def perform_feature_selection_algorithms(features, y, conf,
                                         image_save_directory):
    '''
    Perform feature selection


    '''

    # Scale
    # Use this scaler also for the test data at the end
    X_scaled = pd.DataFrame(
        data=StandardScaler().fit(features).transform(features),
        index=features.index,
        columns=features.columns)

    # Reduce the training set with the number of samples randomly chosen
    X_train_index_subset = sup.get_random_data_subset_index(1000, X_scaled)
    print("Scaled data with standard scaler.")
    print("Get a subset of 1000 samples.")

    relevantFeatureList = []
    selected_feature_list = pd.DataFrame()

    # Predict with logistic regression

    ### Lasso Feature Selection
    m.rc_file_defaults()  # Reset sns
    coefList = execute_lasso_feature_selection(X_scaled, y, conf,
                                               image_save_directory)
    selected_feature_list = selected_feature_list.append(
        pd.Series(name='Lasso', data=coefList))
    relevantFeatureList.extend(coefList)

    print(
        "Prediction of training data with logistic regression: {0:.2f}".format(
            predict_features_simple(X_scaled[coefList], y)))

    ### Tree based feature selection
    treecoefList = execute_treebased_feature_selection(X_scaled, y, conf,
                                                       image_save_directory)
    selected_feature_list = selected_feature_list.append(
        pd.Series(name='Tree', data=treecoefList))
    relevantFeatureList.extend(treecoefList)

    print(
        "Prediction of training data with logistic regression: {0:.2f}".format(
            predict_features_simple(X_scaled[treecoefList], y)))

    ### Backward Elimination
    # Backward Elimination - Wrapper method
    selected_features_BE = execute_backwardelimination_feature_selection(
        X_scaled, y)
    relevantFeatureList.extend(selected_features_BE)
    selected_feature_list = selected_feature_list.append(
        pd.Series(name='Backward_Elimination', data=selected_features_BE))

    print(
        "Prediction of training data with logistic regression: {0:.2f}".format(
            predict_features_simple(X_scaled[selected_features_BE], y)))

    ### Recursive Elimination with Logistic Regression
    # Recursive Elimination - Wrapper method, Feature ranking with recursive feature elimination
    relevant_features, rfe_coef = execute_recursive_elimination_feature_selection(
        X_scaled.iloc[X_train_index_subset], y[X_train_index_subset])
    relevantFeatureList.extend(relevant_features)

    step_size = np.round(len(X_scaled.columns) / 4, 0).astype(int)
    for i in range(step_size, len(X_scaled.columns), step_size):
        selected_feature_list = selected_feature_list.append(
            pd.Series(name='RecursiveTop' + str(i),
                      data=rfe_coef.iloc[0:i -
                                         1]).reset_index()['RecursiveTop' +
                                                           str(i)])
        print('Created RecursiveTop{}'.format(str(i)))

    ### Add the top coloums from all methods
    top_feature_cols = create_feature_list_from_top_features(
        relevantFeatureList)
    selected_feature_list = selected_feature_list.append(
        pd.Series(name='Manual', data=top_feature_cols))

    ### Add all columns
    selected_feature_list = selected_feature_list.append(
        pd.Series(name='All', data=X_scaled.columns))

    return selected_feature_list
コード例 #26
0
ファイル: plot_directive.py プロジェクト: QuLogic/cartopy
def clear_state(plot_rcparams, close=True):
    if close:
        plt.close("all")
    matplotlib.rc_file_defaults()
    matplotlib.rcParams.update(plot_rcparams)
コード例 #27
0
ファイル: models.py プロジェクト: clberube/BISIP
from scipy.signal import argrelextrema

from bisip import invResults as iR
from bisip.utils import format_results, get_data
from bisip.utils import split_filepath, get_model_type
from bisip.utils import var_depth, flatten

try:
    import lib_dd.decomposition.ccd_single as ccd_single
    import lib_dd.config.cfg_single as cfg_single
    print("\nCCDtools available")
except:
    pass

import matplotlib as mpl
mpl.rc_file_defaults()    

#==============================================================================
# Function to run MCMC simulation on selected model
# Arguments: model <function>, mcmc parameters <dict>,traces path <string>
def run_MCMC(function, mc_p, save_traces=False, save_where=None):
    print("\nMCMC parameters:\n", mc_p)
    if save_traces:
        # If path doesn't exist, create it
        if not path.exists(save_where): makedirs(save_where)
        MDL = pymc.MCMC(function, db='txt',
                        dbname=save_where)
    else:
        MDL = pymc.MCMC(function, db='ram',
                        dbname=save_where)
コード例 #28
0
numSamples = features_raw.shape[0]
print("Number of samples={}".format(numSamples))

# Get number of features
numFeatures = features_raw.shape[1]
print("Number of features={}".format(numFeatures))

#Get the number of classes for the supervised learning
numClasses = outcomes_raw[conf['class_name']].value_counts().shape[0]
print("Number of classes={}".format(numClasses))

# ## Analyse and Transform time series

# In[17]:

m.rc_file_defaults()  #Reset sns

datatitle = conf['dataset_name']

plt.figure(num=None, figsize=(12.5, 7), dpi=80, facecolor='w', edgecolor='k')
plt.plot(source['Date'],
         source['Close'])  #To get scatter plot, add 'o' as the last parameter
plt.title(datatitle)
plt.xlabel("Timestamp")
plt.ylabel("Price")
plt.show()

plt.figure(num=None, figsize=(12.5, 7), dpi=80, facecolor='w', edgecolor='k')
plt.plot(source['Date'], np.log(source['Close']))
plt.title(datatitle + ' log transformed')
plt.xlabel("Timestamp")
コード例 #29
0
matplotlib.use(arg, warn=False, force=True) # установить backend
matplotlib.get_backend()                    

class matplotlib.RcParams(*args, **kwargs)  # класс для хранения параметров
	copy()
	find_all(pattern)
	validate                                     # словарь с функциями валидации
matplotlib.rcParams                         # текущие параметры

matplotlib.rc_context(rc=None, fname=None)  # with rc_context(...) строим график

matplotlib.rc(group, **kwargs)              # устанавливает rc параметры
matplotlib.rc_file(fname)                   # устанавливает параметры из файла
matplotlib.rcdefaults()                     # устанавливает параметры по умолчанию
matplotlib.rc_file_defaults()               # устанавливает параметры из rc файла по умолчанию

matplotlib.rc_params(fail_on_error=False)   # возвращает параметры из rc файла по умолчанию
matplotlib.rc_params_from_file(fname, fail_on_error=False, use_default_template=True) # 
matplotlib.matplotlib_fname()               # путь к файлу с конфигами

matplotlib.interactive(b)                   # устанавливает интерактивность
matplotlib.is_interactive()                 # проверяет интерактивность

####################  module style
import matplotlib.style
matplotlib.style.context(style, after_reset=False)
matplotlib.style.reload_library()
matplotlib.style.use(style)
matplotlib.style.library                    # словарь доступных стилей
matplotlib.style.available                  # список доступных стилей
コード例 #30
0
def main(
    n_jobs: int,
    batch_size: int,
    aht_max_number_of_nodes: int,
    alpha_coefficient: float,
    experiment_id: Union[str, int],
    overwrite_neighborhood: bool,
    filter_graphs_to_intersected_vertices: bool,
):
    filter_graphs_to_intersected_vertices = bool(
        filter_graphs_to_intersected_vertices)
    for dataset_path, max_reviews in tqdm(
            datasets, desc="Amazon datasets processing..."):
        for experiment_name in [
                experiment_name_enum.GERANI,
                experiment_name_enum.OUR_ALL_RULES,
                experiment_name_enum.OUR_TOP_1_RULES,
        ]:

            with mlflow.start_run(
                    experiment_id=experiment_id,
                    run_name=
                    f"{experiment_name}-{dataset_path.stem}-{max_reviews}",
            ):
                mlflow.log_param("experiment_name", experiment_name)

                aspect_analysis = AspectAnalysis(
                    input_path=dataset_path.as_posix(),
                    output_path=settings.DEFAULT_OUTPUT_PATH /
                    dataset_path.stem,
                    experiment_name=experiment_name,
                    jobs=n_jobs,
                    batch_size=batch_size,
                    max_docs=max_reviews,
                    aht_max_number_of_nodes=aht_max_number_of_nodes,
                    alpha_coefficient=alpha_coefficient,
                )

                if experiment_name == experiment_name_enum.OUR_ALL_RULES:
                    aspect_analysis.our_pipeline()
                elif experiment_name == experiment_name_enum.GERANI:
                    aspect_analysis.gerani_pipeline()
                elif experiment_name == experiment_name_enum.OUR_TOP_1_RULES:
                    aspect_analysis.our_pipeline_top_n_rules_per_discourse_tree(
                    )
                else:
                    raise Exception("Wrong experiment type")

                for conceptnet_graph_path in tqdm(
                        CONCEPTNET_GRAPH_TOOL_GRAPHS,
                        desc="Conceptnet graph analysis..."):

                    with mlflow.start_run(
                            experiment_id=experiment_id,
                            run_name=conceptnet_graph_path.stem,
                            nested=True,
                            # run_id=f'{experiment_id}-{conceptnet_graph_path.stem}'
                    ) as run_conceptnet:

                        mlflow.log_param("dataset_path", dataset_path)
                        mlflow.log_param("dataset_name", dataset_path.stem)
                        mlflow.log_param("method", experiment_name)
                        mlflow.log_param("max_docs", max_reviews)
                        mlflow.log_param("batch_size", batch_size)
                        mlflow.log_param("n_jobs", n_jobs)
                        mlflow.log_param("conceptnet_graph_path",
                                         conceptnet_graph_path)
                        mlflow.log_param("conceptnet_graph_name",
                                         conceptnet_graph_path.stem)
                        mlflow.log_param("aht_max_number_of_nodes",
                                         aht_max_number_of_nodes)
                        mlflow.log_param("alpha_coefficient",
                                         alpha_coefficient)

                        png_file_path = (
                            aspect_analysis.paths.experiment_path /
                            f"shortest_paths_correlation_{conceptnet_graph_path.stem}.png"
                        )

                        if png_file_path.exists(
                        ) and not overwrite_neighborhood:
                            logger.info(
                                f"{png_file_path.as_posix()} has already exist, skipping to the next setting."
                            )
                            mlflow.log_artifact(png_file_path.as_posix())
                        else:
                            df = prepare_hierarchies_neighborhood(
                                experiments_path=aspect_analysis.paths,
                                conceptnet_graph_path=conceptnet_graph_path,
                                filter_graphs_to_intersected_vertices=
                                filter_graphs_to_intersected_vertices,
                            )

                            logger.info(
                                f"Shortest Paths pairs - data frame: {len(df)}"
                            )
                            df = df[~((df.shortest_distance_aspect_graph.
                                       isin(VALUES_TO_SKIP))
                                      | (df.shortest_distance_conceptnet.
                                         isin(VALUES_TO_SKIP)))]
                            df.drop_duplicates(subset=["aspect_1", "aspect_2"])
                            mlflow.log_metric("number_of_shortest_paths",
                                              len(df))
                            logger.info(
                                f"Shortest Paths pairs - data frame, without no paths and duplicates: {len(df)}"
                            )

                            mlflow.log_dict(
                                pd.DataFrame(
                                    df.shortest_distance_aspect_graph.
                                    value_counts()).to_dict(orient="index"),
                                "shortest_distance_aspect_graph_distribution.json",
                            )

                            mlflow.log_dict(
                                pd.DataFrame(
                                    df.shortest_distance_conceptnet.
                                    value_counts()).to_dict(orient="index"),
                                "shortest_distance_conceptnet_distribution.json",
                            )

                            df = df[df.shortest_distance_aspect_graph <= 6]

                            matplotlib.rc_file_defaults()
                            ax1 = sns.set_style(style=None, rc=None)
                            fig, ax1 = plt.subplots()
                            sns_plot = sns.lineplot(
                                x=df.shortest_distance_aspect_graph,
                                y=df.shortest_distance_conceptnet,
                                ax=ax1,
                            )
                            ax2 = ax1.twinx()
                            df_aspect_graph_distance_distribution = pd.DataFrame(
                                df.shortest_distance_aspect_graph.value_counts(
                                ))
                            df_aspect_graph_distance_distribution.reset_index(
                                inplace=True)
                            df_aspect_graph_distance_distribution.sort_values(
                                by="index", inplace=True)
                            sns.barplot(
                                x=df_aspect_graph_distance_distribution[
                                    "index"],
                                y=df_aspect_graph_distance_distribution.
                                shortest_distance_aspect_graph,
                                alpha=0.5,
                                ax=ax2,
                            )
                            logger.info(
                                f"Shortest Paths correlation figure will be saved in {png_file_path}"
                            )
                            df.sort_values(by="shortest_distance_conceptnet",
                                           inplace=True)
                            pearson_correlation = df.shortest_distance_aspect_graph.corr(
                                df.shortest_distance_conceptnet)
                            spearman_correlation = df.shortest_distance_aspect_graph.corr(
                                df.shortest_distance_conceptnet,
                                method="spearman")
                            kendall_correlation = df.shortest_distance_aspect_graph.corr(
                                df.shortest_distance_conceptnet,
                                method="kendall")
                            df_csv_path = (
                                aspect_analysis.paths.experiment_path /
                                "df.csv")
                            df.to_csv(df_csv_path.as_posix())
                            mlflow.log_artifact(df_csv_path.as_posix())
                            mlflow.log_metrics({
                                "pearson": pearson_correlation,
                                "spearman": spearman_correlation,
                                "kendall": kendall_correlation,
                            })
                            sns_plot.figure.savefig(png_file_path.as_posix())
                            plt.close()

                            mlflow.log_artifact(png_file_path.as_posix())
コード例 #31
0
ファイル: maps.py プロジェクト: HyeonJeongKim/tcrm
from __future__ import division

import numpy as np

import matplotlib
matplotlib.use("Agg", warn=False)
matplotlib.rc_file_defaults()

from matplotlib.figure import Figure
from mpl_toolkits.basemap import Basemap

import Utilities.colours 
from Utilities.smooth import smooth


class MapFigure(Figure, Basemap):

    def __init__(self):
        Figure.__init__(self)
        self.subfigures = []

    def add(self, data, xgrid, ygrid, title, levels, cbarlab, map_kwargs):
        self.subfigures.append((data, xgrid, ygrid, title, levels, cbarlab, map_kwargs))

    def labelAxes(self, axes, xlabel='Longitude', ylabel='Latitude'):
        axes.set_xlabel(xlabel, labelpad=20, fontsize='x-small')
        axes.set_ylabel(ylabel, labelpad=25, fontsize='x-small')


    def addGraticule(self, axes, mapobj, dl=10.):
        xmin = mapobj.llcrnrlon
コード例 #32
0
def _reset_sns():
    sns.reset_orig()
    mpl.rc_file_defaults()
    mpl.rcParams['figure.dpi'] = 300
コード例 #33
0
ファイル: nbplots.py プロジェクト: jhunken/nb2plots
def render_figures(code,
                   code_path,
                   output_dir,
                   output_base,
                   config,
                   context=True,
                   function_name=None,
                   context_reset=False,
                   close_figs=False,
                   raises=None):
    """ Run plot code and save the hi/low res PNGs, PDF in `output_dir`

    Save the images under `output_dir` with file names derived from
    `output_base`.

    Parameters
    ----------
    code : str
        String containing code to run.
    code_path : str
        Path of file containing code.  Usually path to ``.rst`` file.
    output_dir : str
        Path to which to write output images from plots.
    output_base : str
        Prefix for filename(s) for output image(s).
    config : instance
        Sphinx configuration instance.
    context : {True, False}, optional
        If True, use persistent context (workspace) for executing code.
        Otherwise create new empty context for executing code.
    function_name : None or str, optional
        If not-empty str, name of function to execute after executing `code`.
    context_reset : {False, True}, optional
        If True, clear persistent context (workspace) for code.
    close_figs : {False, True}, optional
        If True, close all figures generated before our `code` runs.  False can
        be useful when building up a plot with several `code` blocks.
    raises : None or Exception, optional
        Exception class that code should raise, or None, for no exception.
    """
    # -- Parse format list
    default_dpi = {'png': 80, 'hires.png': 200, 'pdf': 200}
    formats = []
    plot_formats = config.nbplot_formats
    if isinstance(plot_formats, six.string_types):
        # String Sphinx < 1.3, Split on , to mimic
        # Sphinx 1.3 and later. Sphinx 1.3 always
        # returns a list.
        plot_formats = plot_formats.split(',')
    for fmt in plot_formats:
        if isinstance(fmt, six.string_types):
            if ':' in fmt:
                suffix, dpi = fmt.split(':')
                formats.append((str(suffix), int(dpi)))
            else:
                formats.append((fmt, default_dpi.get(fmt, 80)))
        elif type(fmt) in (tuple, list) and len(fmt) == 2:
            formats.append((str(fmt[0]), int(fmt[1])))
        else:
            raise PlotError('invalid image format "%r" in nbplot_formats' %
                            fmt)

    # Build the output
    ns = plot_context if context else {}

    if context_reset:
        plt.close('all')
        matplotlib.rc_file_defaults()
        matplotlib.rcParams.update(config.nbplot_rcparams)
        plot_context.clear()

    close_figs = not context or close_figs

    # Get working directory for code execution
    if setup.config.nbplot_working_directory is not None:
        workdir = _check_wd(setup.config.nbplot_working_directory)
    elif code_path is not None:
        workdir = abspath(dirname(code_path))
    else:
        workdir = None

    if close_figs:
        plt.close('all')

    run_code(code,
             code_path,
             ns,
             function_name,
             workdir=workdir,
             pre_code=setup.config.nbplot_pre_code,
             raises=raises)

    images = []
    fig_managers = Gcf.get_all_fig_managers()
    for j, figman in enumerate(fig_managers):
        if len(fig_managers) == 1:
            img = ImageFile(output_base, output_dir)
        else:
            img = ImageFile("%s_%02d" % (output_base, j), output_dir)
        images.append(img)
        for format, dpi in formats:
            try:
                figman.canvas.figure.savefig(img.filename(format), dpi=dpi)
            except Exception:
                raise PlotError(traceback.format_exc())
            img.formats.append(format)

    return images
コード例 #34
0
def clear_state():
    plt.close('all')
    matplotlib.rc_file_defaults()
コード例 #35
0
 def set_rc_params(matplotlib_options):
     # Reset options from rc file.
     matplotlib.rc_file_defaults()
     if matplotlib_options:
         matplotlib.rcParams.update(matplotlib_options)
コード例 #36
0
 def set_rc_params(matplotlib_options):
     # Reset options from rc file if matplotlib installation supports it.
     if hasattr(matplotlib, 'rc_file_defaults'):
         matplotlib.rc_file_defaults()
     if matplotlib_options:
         matplotlib.rcParams.update(matplotlib_options)
コード例 #37
0
def clear_state(plot_rcparams, close=True):
    if close:
        plt.close('all')
    matplotlib.rc_file_defaults()
    matplotlib.rcParams.update(plot_rcparams)
コード例 #38
0
                fig = plt.figure()
                ax_w = fig.add_subplot(111)  # The big subplot
                #ax_w.set_aspect('equal', adjustable='box')

                plt.sca(ax_w)

                sns.set(font_scale=0.5)
                sns.heatmap(heatmap_wavelets,
                            cbar_kws={'label': 'Energy'},
                            ax=ax_w,
                            cmap='plasma')

                ax_w.set_xlim(0, len(chunk_sequence))
                plt.xticks(fontsize=3)
                plt.yticks(fontsize=3)

                #ax_w.set_title(description, fontweight='bold', fontsize=4)

                fig.tight_layout()

                output_png = '{}_{}_wavelet_type_{}.png'.format(
                    output, k, code_types[m])
                plt.savefig(output_png, dpi=2400, bbox_inches='tight')
                fig.clear()
                plt.close(fig)

                matplotlib.rc_file_defaults()

    print(datetime.datetime.now())
コード例 #39
0
def clear_state(plot_rcparams):
    plt.close('all')
    matplotlib.rc_file_defaults()
    matplotlib.rcParams.update(plot_rcparams)
コード例 #40
0
def clear_state(plot_rcparams):
    plt.close('all')

    if hasattr(matplotlib, 'rc_file_defaults'):
        matplotlib.rc_file_defaults()
        matplotlib.rcParams.update(plot_rcparams)