Exemple #1
0
def main():
    # Get the name of the config file
    config_file = leverage_efficiency.base.get_config_filename(sys.argv)

    # Extract the data from source data folder into common format
    import extract
    extract.main(config_file)

    # Update data with most recent values (optional)
    #import update       # This doesn't connect to the rest of the pipeline yet
    #update.main(config_file)

    # Calculate derived quantities like returns for input into calculations
    import transform
    transform.main(config_file)

    # Perform leverage efficiency calculations
    import analysis
    analysis.main(config_file)

    # Create figures
    import plots
    plots.main(config_file)

    # Create exact figures used in the paper
    import paper_plots
    paper_plots.main(config_file)

    # Create figures used in the EE lecture notes
    import lecture_plots
    lecture_plots.main(config_file)
Exemple #2
0
def main(f_config, gff_infile, outdir, has_mirna, make_plots):
    ensure_dir(outdir)

    cparser = SafeConfigParser()
    cparser.read(f_config)
    f_params = cparser.get('promi2', 'params')
    listoffeatures = cparser.get('promi2', 'features').split(',')
    labelfile = cparser.get('configs', 'labelfile')

    if 'corr' in listoffeatures:
        is_consider_corr = True
    else:
        is_consider_corr = False

    ## Make sure no chrM in infile
    _verify_infile(gff_infile)

    ## Extract features
    gff_allfeatures = extractFeatures_given_gff(f_config, gff_infile, outdir,
                                                has_mirna, is_consider_corr)

    ## Don't consider TSS which does not have a partner miRNA
    gff_allfeatures = _filter_keepValidPairs(gff_allfeatures)

    ## Run Promirna
    fo_predictions = os.path.join(
        outdir, 'Predictions.%s.txt' % os.path.basename(gff_infile))
    promi2.promi2(f_params, listoffeatures, gff_allfeatures, fo_predictions)

    ## Label predictions
    fo_labelledpredictions = fo_predictions + '.label'
    label.main(fo_predictions, labelfile, fo_labelledpredictions)

    ## Generate plots
    if make_plots:
        import plots
        outdir_plt = os.path.join(outdir, 'plots')
        plots.main(fo_labelledpredictions, outdir_plt, f_config)

    return fo_labelledpredictions
Exemple #3
0
def main(f_config, gff_infile, outdir, has_mirna, make_plots):
    ensure_dir(outdir)

    cparser = SafeConfigParser()
    cparser.read(f_config)
    f_params       = cparser.get('promi2', 'params')
    listoffeatures = cparser.get('promi2', 'features').split(',')
    labelfile = cparser.get('configs', 'labelfile')

    if 'corr' in listoffeatures:
        is_consider_corr = True
    else:
        is_consider_corr = False

    ## Make sure no chrM in infile
    _verify_infile(gff_infile)

    ## Extract features
    gff_allfeatures = extractFeatures_given_gff(f_config, gff_infile, outdir, has_mirna, is_consider_corr)

    ## Don't consider TSS which does not have a partner miRNA
    gff_allfeatures = _filter_keepValidPairs(gff_allfeatures)

    ## Run Promirna
    fo_predictions = os.path.join(outdir,
                                  'Predictions.%s.txt' % os.path.basename(gff_infile))
    promi2.promi2(f_params, listoffeatures, gff_allfeatures, fo_predictions)

    ## Label predictions
    fo_labelledpredictions = fo_predictions + '.label'
    label.main(fo_predictions, labelfile, fo_labelledpredictions)

    ## Generate plots
    if make_plots:
        import plots
        outdir_plt = os.path.join(outdir, 'plots')
        plots.main(fo_labelledpredictions, outdir_plt, f_config)

    return fo_labelledpredictions
Exemple #4
0
def main():
    """        
    Clean temp folder and run emcee sampler. 
    When complete:
        - Save results in a .csv file
        - Generate a corner plot
        - Send SMS alert (optional)  
    """
    start = time.time()
    cleanTempFolder()

    def emceeSampler(params_list):
        """" 
        Run emcee sampler and check for convergence every n steps.  

        Parameters
        ----------
        params_list: list, float
            NOTE: This is a global variable, 
            imported from init_params.py (see imports list, line 63). 

        Returns
        ----------
        None  
        """
        def _prepEmcee(params_list):
            """Iniitalize walkers in a Gaussian ball around initial guess."""
            num_params = len(params_list)
            print("# of parameters emcee is fitting: {}".format(num_params))
            print("Initial parameter guesses:{}".format(params_list))
            params_list = np.reshape(params_list, (1, num_params))
            pos = params_list + 1e-4 * np.random.randn(n_walkers, num_params)
            nwalkers, ndim = pos.shape
            return nwalkers, ndim, pos

        def _createBackendFile():
            """Generate a .h5 backend file to save and monitor progress."""
            print(os.getcwd())
            backend_folder = os.path.join(os.getcwd(), "backend")
            datestamp = time.strftime("%Y%m%d-%H%M")
            filename = "backend-file-{}.h5".format(datestamp)
            backend = emcee.backends.HDFBackend(
                os.path.join(backend_folder, filename))
            return backend

        def _saveResults(backend, samples):
            datestamp = time.strftime("%Y%m%d-%H%M")
            #  Save samples in .csv file stamped with
            #  date and time the run was completed
            results_folder = os.path.join(os.getcwd(), "results")
            samples_filename = 'samples-{}.csv'.format(datestamp)
            np.savetxt(os.path.join(results_folder, samples_filename),
                       samples,
                       delimiter=',',
                       fmt='%e')
            #  Update backend file name to match
            #  the date and time of above .csv file
            backend_folder = os.path.join(os.getcwd(), "backend")
            filename = "backend-file-{}.h5".format(datestamp)
            os.rename(backend.filename, os.path.join(backend_folder, filename))

        def _runEmcee(backend, nwalkers, ndim, pos):
            """            
            Set up a pool process to run emcee in parallel. 
            Run emcee sampler and check for convergence very n steps,
            where n is user-defined. 
            """
            backend.reset(nwalkers, ndim)
            index = 0
            autocorr = np.empty(max_iter)
            old_tau = np.inf

            #  Set up parallel processing
            with Pool(processes=n_processes) as pool:
                sampler = emcee.EnsembleSampler(
                    nwalkers,
                    ndim,
                    logProbability,
                    args=(x, y, yerr),
                    backend=backend,
                    moves=[
                        (emcee.moves.DEMove()),
                        (emcee.moves.DESnookerMove()),
                    ],
                    pool=pool)
                #  Run emcee
                for sample in sampler.sample(pos,
                                             iterations=max_iter,
                                             progress=True):

                    #print("log_prob = {} ".format(sampler.get_log_prob()))
                    #print("tau = {}".format(sampler.get_autocorr_time()))
                    #print("acceptance fraction = {} ".format(sampler.acceptance_fraction))

                    #  Check for convergence very "check_iter" steps
                    if sampler.iteration % check_iter:
                        continue
                    tau = sampler.get_autocorr_time(tol=0)
                    autocorr[index] = np.mean(tau)
                    index += 1
                    converged = np.all(tau * 100 < sampler.iteration)
                    converged &= np.all(np.abs(old_tau - tau) / tau < 0.01)
                    if converged:
                        break
                    old_tau = tau

                    #  Get samples
                    samples = sampler.chain[:, :, :].reshape((-1, ndim))
                    print(samples.shape, samples)
            return samples

        backend = _createBackendFile()
        nwalkers, ndim, pos = _prepEmcee(params_list)
        samples = _runEmcee(backend, nwalkers, ndim, pos)
        _saveResults(backend, samples)
        ##return samples

    #  Run emcee sampler code
    emceeSampler(params_list)

    # Plot samples, save in /results/plots folder
    corner_plot.main()

    #  Calculate runtime, send SMS alert (optional)
    end = time.time()
Exemple #5
0
def mainjj():
	plots.main()
	crawl.main()
	main()
Exemple #6
0
def main(data_path):
    """This is basically the full streamlit application code.
    It is run after a small basic set-up and the successfull user
    authentication (see below).
    """
    data_loaded = helpers.load_preprocessed_data(data_path)
    date_list = helpers.get_filter_options_for_due_date(data_loaded, 24)
    max_date = helpers.return_max_date_string(data_loaded)

    filter_due_date = st.sidebar.selectbox("Auswahl Stichdatum:",
                                           options=date_list)

    actual_date = helpers.return_actual_date_string(filter_due_date, max_date)
    data_truncated_head = helpers.truncate_data_to_actual_date(
        data_loaded, actual_date)
    n_years = helpers.calculate_max_n_years_available(data_truncated_head)

    filter_result_dim = st.sidebar.selectbox(
        "Auswahl Resultatsdimension:",
        options=helpers.get_filter_options_for_result_dim(n_years),
    )

    data_truncated = helpers.truncate_data_n_years_back(
        data_truncated_head, actual_date, n_years)
    data_prepared = helpers.prepare_values_according_to_result_dim(
        data_truncated, filter_result_dim, actual_date)

    if filter_result_dim == "Monat":
        avg_bool = st.sidebar.checkbox("Ø-Werte pro aktive Konten",
                                       value=False)
    else:
        st.sidebar.text("[Ø-Werte nicht verfügbar]")
        avg_bool = False

    data_prepared_value = helpers.replace_monthly_values_with_avg(
        data_prepared, filter_result_dim, avg_bool)
    data_with_diff = helpers.calculate_diff_column(data_prepared_value)
    data_actual = helpers.create_df_with_actual_period_only(
        data_with_diff, actual_date)

    mandant_groups = helpers.get_filter_options_for_mandant_groups(data_actual)
    kpi_groups = helpers.get_filter_options_for_kpi_groups()

    # SIDEBAR

    filter_mandant = st.sidebar.selectbox("Auswahl Mandanten-Gruppe:",
                                          options=mandant_groups)
    filter_kpi_groups = st.sidebar.selectbox("Auswahl KPI-Gruppe:",
                                             options=kpi_groups)
    filter_display_mode = st.sidebar.radio(
        "Auswahl Gruppierung für Anzeige:",
        options=["nach Entität", "nach KPI"])

    # TODO: Filter for Product Dim is temporarily (?) disabled (fixed to "Produkt")
    filter_product_dim = "Produkt"
    # filter_product_dim = st.sidebar.radio(
    #     "Auswahl Produktsicht:", options=["Produkt", "Kartenprofil"]
    # )

    st.sidebar.markdown("---")
    st.sidebar.text("")
    st.sidebar.text(f"Datenstand:\n {max_date}")

    # UPPER FILTER OPTIONS MAIN PAGE

    data = helpers.filter_for_sidebar_selections_mandant(
        data_actual, filter_mandant)
    data = helpers.filter_for_sidebar_selections_kpi(data, filter_kpi_groups)

    # GENERATING OPTION FOR MAIN PAGE FILTERS

    entity_options = helpers.get_filter_options_for_entities(data)
    kpi_options = helpers.get_filter_options_for_kpi(data)

    # MAIN PAGE FILTERS

    filter_entity = st.multiselect("Select entities:",
                                   options=entity_options,
                                   default=["[alle]"])
    filter_kpi = st.multiselect("Select KPIs:",
                                options=kpi_options,
                                default=["[alle]"])

    st.write("")

    # FILTERING DATA ACCORDING TO CHOICES

    data = helpers.filter_for_entity_and_kpi(
        data,
        filter_entity=filter_entity,
        filter_kpi=filter_kpi,
    )

    # DISPLAY AND STYLING OF DATAFRAMES

    data_display = helpers.prepare_for_display(data, filter_display_mode)
    helpers.display_dataframes(data_display, filter_display_mode,
                               filter_product_dim, filter_mandant,
                               filter_entity, avg_bool)

    # DISPLAY STANDARD PLOT IF CONDITIONS ARE MET

    fig, df_plot = plots.main(data, data_truncated)
    if fig is not None:
        st.plotly_chart(fig)

    # EXCEL EXPORT

    excel = st.button("Download Excel")

    if excel:
        if fig is not None:
            download_data = df_plot
        else:
            download_data = downloads.style_for_export_if_no_plot(
                data, filter_display_mode)

        download_path = downloads.get_download_path()
        b64, href = downloads.export_excel(download_data, download_path)
        st.markdown(href, unsafe_allow_html=True)
Exemple #7
0
def test_plots(tmp_path: str):
    plots.main(tmp_path)
Exemple #8
0
def main(f_config, gff_cage, is_gff, outdir, make_plots):
    cparser = SafeConfigParser()
    cparser.read(f_config)

    in_bname = os.path.basename(gff_cage)

    if outdir == None:
        outdir = 'promi2_outdir_' + in_bname + '_' + random_string(6)
    ensure_dir(outdir, False)

    f_param = cparser.get('promi2', 'params')
    listoffeatures = cparser.get('promi2', 'features')
    listoffeatures = listoffeatures.split(',')
    if 'corr' in listoffeatures:
        is_consider_corr = True
        corrmethod = cparser.get('correlation', 'corrmethod')
    else:
        is_consider_corr = False

    ## PART1: Feature extraction
    if not is_gff:
        ## feature extraction: cpg, cons, tata (features.py)
        outdir_seqfeatures = os.path.join(outdir, 'seqfeatures')
        ensure_dir(outdir_seqfeatures, False)

        gff_1kbfeatures = os.path.join(outdir_seqfeatures,
                                       'features_1kbseq.gff')

        f_fasta = cparser.get('genome', 'fasta')
        f_chromsizes = cparser.get('genome', 'chromsizes')
        d_phastcons = cparser.get('cons', 'phastcons')
        TRAP = cparser.get('tata', 'trap')
        f_psemmatrix = cparser.get('tata', 'psem')

        features.main(gff_cage, outdir_seqfeatures, f_fasta, f_chromsizes,
                      d_phastcons, TRAP, f_psemmatrix, gff_1kbfeatures)

        ## feature extraction: mirna_proximity (mirna_proximity.py)
        outdir_mprox = os.path.join(outdir, 'mprox')
        ensure_dir(outdir_mprox, False)

        gff_mirnaprox = os.path.join(outdir_mprox, 'features_mirnaprox.gff')

        gff_mirna = cparser.get('mirbase', 'gff2')

        mirna_proximity.main(gff_cage, gff_mirna, gff_mirnaprox)

        ## merge extracted features (gff_unify_features.py)
        gff_features = os.path.join(outdir, 'Features.1kb.mprox.' + in_bname)
        gff_unify_features.main(gff_1kbfeatures, gff_mirnaprox, 'mirna_prox',
                                '0', gff_features)

        if is_consider_corr:
            ## merge extracted features (gff_unify_features.py) after compute correlation
            gff_features_corr = os.path.join(
                outdir, 'Features.1kb.mprox.%s.%s' % (corrmethod, in_bname))

            outdir_corr = os.path.join(outdir, 'corr')

            m_mirna = cparser.get('correlation', 'srnaseqmatrix')
            m_tss = cparser.get('correlation', 'cageseqmatrix')

            gff_corr = correlation.main(gff_mirna, m_mirna, m_tss, corrmethod,
                                        outdir_corr)
            gff_unify_features.main(gff_features, gff_corr, 'corr', '0',
                                    gff_features_corr)

            gff_allfeatures = gff_features_corr
        else:
            gff_allfeatures = gff_features
    else:
        gff_allfeatures = gff_cage
        with open(gff_allfeatures) as f:
            l = f.readline().split('\t')
            if not (':' in l[7]):
                sys.exit('ERROR: this is not a features.gff formatted file')

    ## PART2: extract parameters & run promirna
    f_prediction = os.path.join(outdir, 'Predictions.' + in_bname + '.txt')
    print 'COMPUTING: "%s"...' % f_prediction
    promi2(f_param, listoffeatures, gff_allfeatures, f_prediction)

    ## PART3: plots
    if make_plots:
        plotdir = os.path.join(outdir, 'plots')
        ensure_dir(plotdir, False)
        plots.main(f_prediction, plotdir, f_config)
Exemple #9
0
# Copyright 2021 Alexander Huntley

# This file is part of Plots.

# Plots is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# Plots is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with Plots.  If not, see <https://www.gnu.org/licenses/>.

import plots
plots.main()