Esempio n. 1
0
def emissions(df_exp, df_ref, test_cfg, filename_student_test=''):
    '''
    Perform emissions test for each variable of dataframe df_b
    :param df_a: reference datframe, containing big sample
    :param df_b: datframe containing data to test
    :param filename_student_test: filename for writing 
            result of t-test result into a csv file
    :return: result of the emissions test in a dataframe
    '''

    row_list_df = []

    # select base reference for emission
    df_ref = df_ref.loc[df_ref['exp'] == 'emis_base_ref']

    for var in df_exp.keys():
        if 'exp' in var:
            continue
        log.debug("Emissions test for {}".format(var))

        abs_deviation = abs(df_exp[var].iloc[0] - df_ref[var].iloc[0])
        rel_deviation = abs_deviation / df_ref[var].iloc[0] * 100

        # append results for construction datframe df_result
        dict1 = {'variable': var, test_cfg.metric: rel_deviation}
        row_list_df.append(dict1)

    # construction dataframe
    df_result = pd.DataFrame(row_list_df,
                             columns=['variable', test_cfg.metric])

    # sort per p value
    df_result.sort_values(by=[test_cfg.metric], inplace=True)

    return (df_result)
Esempio n. 2
0
def download_ref_to_stages_if_required(f_pattern_ref, p_stages,
                                       f_vars_to_extract, test):

    # no ref-file passed as argument of process_data
    if f_pattern_ref == paths.rootdir:
        log.info('Download reference file from ftp-server')

        filename_ftp_link = f_vars_to_extract.replace('.csv', '.txt').replace(
            'vars_', 'ftp_')

        path_to_ftp_link = os.path.join(paths.p_f_vars_proc, test)
        file_with_ftp_link = utils.clean_path(path_to_ftp_link,
                                              filename_ftp_link)

        output_file = os.path.join(p_stages, 'ftp_ref_pattern.nc')

        cmd = ('wget --input-file={} '
               '--output-document={}'.format(file_with_ftp_link, output_file))
        log.debug('ftp-command: {}'.format(cmd))
        utils.shell_cmd(cmd, py_routine=__name__)

        f_pattern_ref = output_file

    else:
        log.info('Using user-defined reference file for test '
                 '{}'.format(test))

    return f_pattern_ref
Esempio n. 3
0
def rmse(df_exp, test_cfg):
    '''
    Perform rmse  test for each variable fo dataframe df_b
    :param df_a: reference datframe, containing big sample
    :param df_b: datframe containing data to test
    :return: result of the pattern correlation in a dataframe
    '''

    row_list_df = []

    for var in df_exp.keys():
        if 'exp' in var:
            continue
        log.debug("Rmse test for {}".format(var))

        # append results for construction datframe df_result
        dict1 = {'variable': var, test_cfg.metric: df_exp[var].iloc[0]}
        row_list_df.append(dict1)

    # construction dataframe
    df_result = pd.DataFrame(row_list_df,
                             columns=['variable', test_cfg.metric])

    df_result.sort_values(by=[test_cfg.metric], inplace=True)

    return (df_result)
Esempio n. 4
0
def determine_actions_for_data_processing(exp, tests, p_stages, lforce):

    actions = {'standard_postproc': {}, 'test_postproc': {}}

    if lforce:
        log.warning('Redo all processing steps')

    # see if standard-postprocessing is needed
    for test in tests:

        standard_proc_nc = os.path.join(
            p_stages, 'standard_postproc_{}_{}.nc'.format(test, exp))
        if (not os.path.isfile(standard_proc_nc) or lforce):
            action_needed = True
        else:
            action_needed = False

        actions['standard_postproc'][test] = action_needed

        test_specific_csv = os.path.join(
            p_stages, 'test_postproc_{}_{}.csv'.format(test, exp))

        if (not os.path.isfile(test_specific_csv) or lforce
                or actions['standard_postproc'][test]):

            action_needed = True
        else:
            action_needed = False

        actions['test_postproc'][test] = action_needed

    log.debug('actions: {}'.format(actions))

    return (actions)
Esempio n. 5
0
def shell_cmd(cmd, py_routine, lowarn=False):
    """ 
    Send shell command through subprocess.Popen and returns a string 
    containing the cmd output

    lowarn = True -> only a warning is written, no exit (To use with caution!)
    """

    # send cmd to be executed
    p = subprocess.Popen(cmd,
                         shell=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                         universal_newlines=True)

    # gets the output of the cmd
    out, err = p.communicate()

    # initailisation output status
    out_status = 0
    # check if cmd was executed properly
    if p.returncode != 0:
        log.debug("{} (shell_cmd): ERROR in the command: \n {}".format(
            py_routine, cmd))
        if lowarn:
            log.warning("Shell command failed, but explicitly "
                        "keep program alive: \n {}".format(err))
            out_status = 1
        else:
            log.error("Error returned: {}".format(err))

    return (out_status, str(out))
Esempio n. 6
0
def timeser_proc_nc_to_df(exp, filename, p_stages, already_a_timeseries=False):
    '''
Arguments: 
    exp      = experiment name
    filename = filename of the netCDF returned by function standard_postproc
    p_stages = directory where processing steps are stored

returns:
    dataframe with processed data for welchstest
    '''

    test = 'welch'

    if not already_a_timeseries:
        timeser_filename = 'test_postproc_{}_{}.nc'.format(test, exp)
        cdo_cmd = 'cdo -L yearmean -fldmean -vertsum {} {}'.format(
            filename, timeser_filename)
        utils.shell_cmd(cdo_cmd, py_routine=__name__)
    else:
        log.debug('Skipping CDO-processing step')
        timeser_filename = filename

    # list of variables in the timeserie netcdf
    # file to drop (not to put into the dataframe)
    vars_to_drop = []

    log.info('Processing netCDF: {}'.format(timeser_filename))

    # open dataset
    data = xr.open_dataset(timeser_filename)

    # Delete variables
    # useless variable time_bnds
    if ('time_bnds' in data.keys()):
        data = data.drop('time_bnds')
    # 3D vars
    if len(vars_to_drop) > 0:
        data.drop(labels=vars_to_drop)

    # removed degenerated dimensions
    data = data.squeeze(drop=True)

    # transforms into dataframe
    df_data = data.to_dataframe()

    # export in a file
    os.makedirs(p_stages, exist_ok=True)
    csv_filename = os.path.join(p_stages,
                                'test_postproc_{}_{}.csv'.format(test, exp))
    df_data.to_csv(csv_filename, index=None, header=True, sep=';')
    log.info('CSV file can be found here: {}'.format(csv_filename))

    log.info('Finished {} for file {}'.format(__name__, timeser_filename))

    return (df_data)
Esempio n. 7
0
def normalize_data(dataset):

    log.info('Normalize fields in {} with mean and '
             'standard deviation'.format(dataset))

    data = dataset.replace('.nc', '')
    std_data = '{}_std.nc'.format(data)
    std_data_enlarged = '{}_std_enlarged.nc'.format(data)
    mean_data = '{}_mean.nc'.format(data)
    mean_data_enlarged = '{}_enlarged.nc'.format(data)
    sub_data = '{}_sub.nc'.format(data)
    normalized_data = '{}_normalized.nc'.format(data)

    log.debug('Clean intermediate files for normalization')
    shell_cmd = 'rm {} {} {} {} {} {}'.format(std_data, mean_data,
                                              std_data_enlarged,
                                              mean_data_enlarged, sub_data,
                                              normalized_data)
    utils.shell_cmd(shell_cmd, py_routine=__name__, lowarn=True)

    cdo_cmd = 'cdo -L fldstd {} {}'.format(dataset, std_data)
    utils.shell_cmd(cdo_cmd, py_routine=__name__)

    cdo_cmd = 'cdo -L fldmean {} {}'.format(dataset, mean_data)
    utils.shell_cmd(cdo_cmd, py_routine=__name__)

    #cdo_cmd = 'cdo -L sub {} -enlarge,{} {} {}'.format(dataset,
    cdo_cmd = 'cdo -L -enlarge,{} {} {}'.format(dataset, mean_data,
                                                mean_data_enlarged)
    utils.shell_cmd(cdo_cmd, py_routine=__name__)

    cdo_cmd = 'cdo -L -enlarge,{} {} {}'.format(dataset, std_data,
                                                std_data_enlarged)
    utils.shell_cmd(cdo_cmd, py_routine=__name__)

    cdo_cmd = 'cdo -L sub {} {} {}'.format(dataset, mean_data_enlarged,
                                           sub_data)
    utils.shell_cmd(cdo_cmd, py_routine=__name__)

    cdo_cmd = 'cdo -L div {} {} {}'.format(sub_data, std_data_enlarged,
                                           normalized_data)
    utils.shell_cmd(cdo_cmd, py_routine=__name__)

    return normalized_data
Esempio n. 8
0
def welch_test(df_a, df_b, filename_student_test=''):
    '''
    Perform Welch t-test for each variable fo dataframe df_b
    :param df_a: reference datframe, containing big sample
    :param df_b: datframe containing data to test
    :param filename_student_test: filename for writing result 
            of t-test result into a csv file
    :return: result of the student test in a dataframe
    '''

    row_list_df = []

    for var in df_b.keys():
        if 'exp' in var:
            continue
        log.debug("Welch's t-test for {}".format(var))
        # Welch's t-test
        t, p = stats.ttest_ind(df_a[var],
                               df_b[var],
                               equal_var=False,
                               nan_policy='omit')

        # append results for construction datframe df_result
        dict1 = {'variable': var, 't-value': t, 'p-value': p}
        row_list_df.append(dict1)

    # construction dataframe
    df_result = pd.DataFrame(row_list_df,
                             columns=['variable', 't-value', 'p-value'])

    # sort per p value
    df_result.sort_values(by=['p-value'], inplace=True)

    # if a filename is given, write the student-stest
    # result into the file named filename_student_test
    if len(filename_student_test) > 0:
        log.info('Write result to {}'.format(filename_student_test))
        df_result.to_csv(filename_student_test, sep=',')

    return (df_result)
Esempio n. 9
0
def plt_welchstest(df_tot, new_exp, df_result, p_stages=paths.p_stages):
    '''

    :param df_tot:  Dataframe containing containing all 
        global annual mean (reference & new_exp)
    :param new_exp:   Name of the new exp which is analysed
    :param df_result: Dataframe containing the results of the Welch's test
    :param p_stages : path to save the figures

    :return: None, but the figure is saved in p_stages
    '''

    # simple statistics, sort by exp 
    #  to be sure the order is the same in both dataframe
    df_tot_mean = df_tot.groupby(['exp']).mean()\
        .sort_values(['exp']).reset_index()

    # for std, the panda std has a bug 
    # cf https://github.com/pandas-dev/pandas/issues/16799
    df_tot_std = df_tot.groupby(['exp']).std()\
        .sort_values(['exp']).reset_index()

    # ensure new exp to be the last line
    iexp = df_tot_mean.index[df_tot_mean['exp'] == new_exp]
    new_order = df_tot_mean.index.drop(iexp).append(iexp)
    df_tot_mean = df_tot_mean.reindex(new_order)
    df_tot_std = df_tot_std.reindex(new_order)

    # number col/rows per page
    nlin = 3
    ncol = 3
    nplot = nlin * ncol

    # needed for multipage pdf file
    filename_mean_std_figures = 'glob_means_{}.pdf'.format((new_exp))
    p_pdf_file_var = os.path.join(p_stages,filename_mean_std_figures)
    pp = PdfPages(p_pdf_file_var)

    # loop over all variables
    for ivar,var in enumerate(df_result.variable):
        log.debug('Create plot for {}'.format(var))

        # subplot preparation
        # ------------------------------------------------------------------------
        # number of plot
        iplot = np.mod(ivar,nplot)

        # set the plotting frame
        if (iplot == 0):
            fig, plt_nbr = plt.subplots(nlin, 
                                        ncol, 
                                        sharex='col',
                                        figsize=(12, 12))

        # subplot coordinate
        icol = np.mod(iplot, ncol)
        ilin = np.int(np.floor(iplot / ncol))

        # actual plot
        act_plt = plt_nbr[ilin, icol]

        # x-axis
        xaxis = np.arange(df_tot_mean.shape[0])

        # plotting
        # ---------------------------------------------------------------------------
        nmisval = df_tot_mean[var].isna().sum()
        i_newexp = len(xaxis) - nmisval - 1
        # define colors
        colors = len(xaxis) * ['k']
        colors[i_newexp:len(xaxis)] = 'k'
        # define thickness
        thickness = len(xaxis) * [1.5]
        thickness[i_newexp] = 3

        # plot mean and std for each variable
        act_plt.errorbar(xaxis, df_tot_mean[var], yerr=df_tot_std[var],
                         fmt='+k',ecolor=colors, elinewidth=thickness)

        # plot average reference experiments (grey band)
        m_ref = df_tot[df_tot.exp != new_exp][var].mean()
        s_ref = df_tot[df_tot.exp != new_exp][var].std()
        act_plt.axhline(m_ref, c='k')
        act_plt.fill_between([-1, max(xaxis) - 0.5],
                             m_ref - s_ref, m_ref + s_ref,
                             facecolor='grey',alpha=0.6)

        # plot color background
        color_graph = df_result.loc[df_result.variable == var]['col-graph']\
            .values[0]
        act_plt.set_facecolor('{}'.format(color_graph))

        # manage labels/titel/etc
        # -------------------------------------------------------------------------------
        # label settings
        act_plt.xaxis.set_ticks(xaxis)
        act_plt.set_xticklabels(df_tot_mean['exp'],rotation=90)

        # title settings
        pvalue = float(df_result[df_result.variable == var]['p-value'])
        act_plt.set_title('{}, p-value = {:.2%}'.format(var,pvalue))

        # Saving page & increase number var
        # --------------------------------------------------------------------------------
        # save full page
        if (iplot == (nplot - 1)):
            pp.savefig()

    # save and close odf file file
    fig.savefig(pp, format='pdf')
    pp.close()

    log.info('Detailed plots of mean and standard deviation per variable ' 
             'can be found in the file {}'.format(p_pdf_file_var))
Esempio n. 10
0
def main(exp,
         tests,
         p_stages=paths.p_stages,
         p_ref_csv_files=paths.p_ref_csv_files,
         ltestsuite=False,
         lverbose=False):

    # initialisation
    new_branch_name = 'test_add_{}'.format(exp)
    files_to_commit = []

    # fill up file 'Exps_description.csv' with additional
    # information via user input
    f_exp_descr = os.path.join(p_ref_csv_files, 'Exps_description.csv')
    if not ltestsuite:
        add_line_descr_f(exp=exp, f_exp_descr=f_exp_descr)
    files_to_commit.append(f_exp_descr)

    for test in tests:
        test_cfg = get_config_of_current_test(test)

        csv_file = utils.clean_path(
            p_stages, 'test_postproc_{}_{}.csv'.format(test, exp))

        # what is the filename in the reference pool
        filename_in_ref_dir = '{}_{}.csv'.format(test_cfg.ref_name, exp)
        # what is the location to store that file
        place_for_reference = os.path.join(p_ref_csv_files, test,
                                           filename_in_ref_dir)

        log.debug('Copy {} to {}'.format(csv_file, place_for_reference))

        if not ltestsuite:
            shutil.copy(csv_file, place_for_reference)

        files_to_commit.append(place_for_reference)

        # copy pdf with bar-plots from Welch's-test
        if test == 'welch':

            pdf_file = utils.clean_path(
                p_stages, '{}_{}.pdf'.format(test_cfg.ref_name, exp))

            # what is the name of the pdf in the reference pool
            filename_in_ref_dir = '{}_plots.pdf'.format(test_cfg.ref_name)
            # what is the location to store that file
            place_for_reference = os.path.join(p_ref_csv_files, test,
                                               filename_in_ref_dir)

            log.debug('Copy {} to {}'.format(csv_file, place_for_reference))
            files_to_commit.append(place_for_reference)

            if not ltestsuite:
                shutil.copy(pdf_file, place_for_reference)

    # root is important to not fail during git commands
    os.chdir(paths.rootdir)

    # checkout new branch
    if not ltestsuite:
        log.info('Create and checkout new branch {}'.format(new_branch_name))
        git_cmd = 'git checkout -B {}'.format(new_branch_name)
        utils.shell_cmd(git_cmd, py_routine='add_exp_to_ref.py')

        # commit all modified files prior in the function to git
        for file in files_to_commit:
            git_cmd = 'git add {}'.format(file)
            log.debug(git_cmd)
            utils.shell_cmd(git_cmd, py_routine=__name__)

        log.debug('Commit files {}'.format(files_to_commit))
        commit_message = input('Please type your commit message :')
        git_cmd = 'git commit -m "{}"'.format(commit_message)
        utils.shell_cmd(git_cmd, py_routine=__name__)

    # Finish
    log.info(
        Style.GREEN(
            'Files are added in the new branch: '
            '{} in your local git repository.'.format(new_branch_name)))
    log.info('To add the file to the official repository, '
             'please perform the following steps:')
    log.info('1. Push the new branch into the official repo:')
    log.info('   git push --set-upstream origin {}'.format(new_branch_name))
    log.info('2. On the Open Web interface (GitHub) , open a Pull Request.')

    log.banner('End add_exp_to_ref for experiment {}'.format(exp))
    return ()
Esempio n. 11
0
def standard_postproc(exp, test, spinup, p_raw_files, raw_f_subfold, p_stages,
                      f_vars_to_extract):
    '''
Perfom standard post-processing using cdo 

Arguments: 
    exp            = experiment name
    test           = name of current test to process data
    spinup         = number of files (from begining of simulation) 
                     to ignore du to model spinup
    p_raw_files    = path to raw model output
    raw_f_subfold  = subfolder in p_raw_files with model output 
                     [p_raw_files]/[raw_f_subfold]
    p_stages       = directory where processing steps are stored
    f_vars_to_extract =  csv file containg the variables to proceed

returns: 
   netCDF filename containing the fields as defined in f_vars_to_extract
    '''

    log.info('Postprocess data using CDO for test {}'.format(test))

    # check that exp is defined
    if exp is None:
        log.error('Experiment is not defined.\n exp = {}'.format(exp))

    # get variables to process:
    p_test_vars_proc = os.path.join(paths.p_f_vars_proc, test)
    full_p_f_vars = utils.clean_path(p_test_vars_proc, f_vars_to_extract)
    df_vars = pd.read_csv(full_p_f_vars, sep=',')

    # define expressions
    df_vars['expr'] = df_vars['var'] + '=' + df_vars['formula']

    # name of output file
    ofile_tot = os.path.join(p_stages,
                             'standard_postproc_{}_{}.nc'.format(test, exp))

    # initialisation
    files_error = []  # list files giving error
    files_proceed = []  # list of files where data are collected

    # sometimes data is stored in a folder called Raw
    p_raw_folder = os.path.join(p_raw_files, exp, raw_f_subfold)

    # SPECIAL CASE, echam specific :
    # if the folder containing the Raw files have been deleted,
    # but folder 'Data' contains already global annual means
    if not os.path.isdir(p_raw_folder):
        log.warning('The folder containing the raw data '
                    'has been deleted : {}'.format(p_raw_folder))

        p_altern_timeser_fold = os.path.join(p_raw_files, exp, 'Data')
        if test == 'welch':
            time_series_altern_fold = glob.glob(
                os.path.join(p_altern_timeser_fold, 'timeser_daint_*.nc'))

        if test == 'fldcor' or test == 'rmse':
            time_series_altern_fold = glob.glob(
                os.path.join(p_altern_timeser_fold, 'multi_annual_means_*.nc'))
        if test == 'emi':
            time_series_altern_fold = glob.glob(
                os.path.join(p_altern_timeser_fold, 'emi_*.nc'))

        if len(time_series_altern_fold) < 1:
            log.error('Could not find files in alternative directory '
                      '{}'.format(time_series_altern_fold))
        else:
            log.info('The alternative folder has been found instead: '
                     '{}'.format(p_altern_timeser_fold))

            log.warning('This section of code is only tested for ECHAM! '
                        'It is not recommended to use it for other cases')

            if len(time_series_altern_fold) == 1:
                index_ts = 0
            if len(time_series_altern_fold) > 1:

                for (i, item) in enumerate(time_series_altern_fold):
                    print(i, item)
                index_ts = int(
                    input('Please type the index of the file'
                          ' to use (negative means '
                          'none of them) : '))

            # If index positive, copy the time serie and exit
            if index_ts >= 0:
                log.info('File used : {}'.format(
                    time_series_altern_fold[index_ts]))

                cdo_cmd = ('cdo -L -chname,CDNC,burden_CDNC '
                           '-chname,ICNC,burden_ICNC '
                           '-chname,SCF,SCRE -chname,LCF,LCRE '
                           '{} {}'.format(time_series_altern_fold[index_ts],
                                          ofile_tot))
                utils.shell_cmd(cdo_cmd, py_routine=__name__)

                # convert netCDF to dataframe,
                # therefore skip next processing step
                if test == 'welch':
                    timeser_proc_nc_to_df(exp,
                                          ofile_tot,
                                          p_stages,
                                          already_a_timeseries=True)
                    skip_next_steps = True
                else:
                    skip_next_steps = False

                log.warning('Leave ECHAM-only code-section! '
                            'You are save again...')
                return (ofile_tot, skip_next_steps)

    # NORMAL CASE
    else:
        log.info('Analyse files in : {}'.format(p_raw_folder))

    log.banner('Time for a coffee...')

    # loop over output stream
    for stream in df_vars['file'].unique():

        # extract all lines with file f
        df_file = df_vars[df_vars.file == stream]

        # list all available files in p_raw_files/exp/raw_f_subfold
        #which have stream f
        # restart files and {}m.format(stream) e.g. echamm.nc
        # files are not considered
        final_p_raw_files = os.path.join(p_raw_folder,
                                         '*_*{}*.nc'.format(stream))
        ifiles = [
            fn for fn in glob.glob(final_p_raw_files) if sum([
                s in os.path.basename(fn)
                for s in ['stream', '{}m'.format(stream)]
            ]) == 0
        ]
        if len(ifiles) == 0:
            log.warning('No raw files found for stream {} at address : \n'
                        '{}'.format(stream, final_p_raw_files))

        # sort files in chronoligcal order
        # (this will be needed for doing yearmean properly)
        ifiles.sort()

        print_statistics_of_raw_files(ifiles, stream, exp)

        # remove spin-up files
        log.info('Remove first {} months of data '
                 'due to model spinup'.format(spinup))
        ifiles = ifiles[int(spinup):]

        # output file for stream f
        ofile_str = '{}_{}.nc'.format(exp, stream)

        # variables to extract form netcdf
        # files (this is needed for optimization)
        variables = variables_to_extract(vars_in_expr=df_file.formula.values)

        # Extract variables needed from big files
        log.info('Extract variables from file: {}'.format(stream))

        # initialization
        tmp_selvar_files = []  # list to store the ifiles

        for ifile in ifiles:
            # basename of ifile
            ifile_bsn = os.path.basename(ifile)
            log.debug('File {}'.format(ifile_bsn))
            tmp_selvar_file = 'tmp_extract_{}'.format(ifile_bsn)

            cdo_cmd = 'cdo selvar,{} {} {}'.format(','.join(variables), ifile,
                                                   tmp_selvar_file)
            out_status, out_mess = utils.shell_cmd(cdo_cmd,
                                                   py_routine=__name__,
                                                   lowarn=True)

            if out_status == 0:
                tmp_selvar_files.append(tmp_selvar_file)
            else:
                files_error.append(ifile_bsn)

        # Merge all the monthly files together
        log.info('Copy {} files'.format(stream))
        tmp_merged = 'tmp_{}_{}.nc'.format(exp, stream)
        if os.path.isfile(tmp_merged):
            os.remove(tmp_merged)

        cdo_cmd = 'cdo -copy {} {}'.format(' '.join(tmp_selvar_files),
                                           tmp_merged)
        utils.shell_cmd(cdo_cmd, py_routine=__name__)

        # compute needed variables
        log.info('Compute variables for file : {}'.format(stream))
        if os.path.isfile(ofile_str):
            os.remove(ofile_str)

        expr_str = ';'.join((df_file.expr.values))
        cdo_cmd = 'cdo -L -setctomiss,-9e+33 -expr,"{}" {} {}'.format(
            expr_str, tmp_merged, ofile_str)
        utils.shell_cmd(cdo_cmd, py_routine=__name__)

        # keep trace of output file per stream
        files_proceed.append(ofile_str)

        # cleaning
        [os.remove(f) for f in tmp_selvar_files]
        os.remove(tmp_merged)

    # merge all stream files
    if os.path.isfile(ofile_tot):
        os.remove(ofile_tot)
    cdo_cmd = 'cdo merge {} {}'.format(' '.join(files_proceed), ofile_tot)
    utils.shell_cmd(cdo_cmd, py_routine=__name__)

    [os.remove(f) for f in files_proceed]

    # Finish
    if len(files_error) != 0:
        log.warning('Files with a problem: {}'.format(','.join(files_error)))

    log.info('Postprocess data using CDO for test {} finished. \n '
             'Output here : {}'.format(test, ofile_tot))

    # return name of output file
    return (ofile_tot, False)
Esempio n. 12
0
def main(new_exp, results_data_processing, tests, p_stages, p_ref_csv_files,
         ltestsuite, f_vars_to_extract):

    df_exp = {}
    df_ref = {}
    p_csv_files = {}
    testresult_csv = {}
    df_result = {}

    for test in tests:
        log.info('Prepare references for test {}'.format(test))

        test_cfg = get_config_of_current_test(test)

        results_data_processing[test]['exp'] = new_exp

        # list of paths to all csv files
        p_csv_files[test] = glob.glob(
            os.path.join(p_ref_csv_files, test,
                         '{}_*csv'.format(test_cfg.ref_name)))
        if len(p_csv_files[test]) == 0:
            log.error('No reference files found in {}'.format(p_ref_csv_files))

        log.debug('{} reference(s) found for test \
                  {}'.format(len(p_csv_files[test]), test))

        # create big dataframe containing all reference exps
        df_ref[test] = create_big_df(test_cfg.ref_name,
                                     list_csv_files=p_csv_files[test])

        # Exclude all the non-desired variables (1) var from file, 2) exp)
        full_p_f_vars = os.path.join(paths.p_f_vars_proc, test,
                                     f_vars_to_extract)
        vars_to_analyse = list(
            pd.read_csv(full_p_f_vars, sep=',')['var'].values)
        vars_to_analyse.append('exp')
        try:
            df_ref[test] = df_ref[test][vars_to_analyse]
        except KeyError as e:
            log.warning(e)
            log.error('Variables defined in {} are not contained in reference \
                {}'.format(utils.rel_path(f_vars_to_extract),
                           utils.rel_path(p_ref_csv_files)))

        df_exp[test] = results_data_processing[test][vars_to_analyse]

        log.info('References for test {} prepared'.format(test))

        testresult_csv[test] = os.path.join(
            p_stages, 'result_{}_{}.csv'.format(test, new_exp))

        if test == 'welch':
            log.banner('')
            log.banner("Perform Welch's t-test for each variable")
            log.banner('')
            df_result[test] = welch_test(
                df_a=df_ref[test],
                df_b=df_exp[test],
                filename_student_test=testresult_csv[test])
            df_result[test]['p-value [%]'] = df_result[test]['p-value'] * 100.

        if test == 'fldcor':
            log.banner('')
            log.banner("Perform fldcor test for each variable")
            log.banner('')
            df_result[test] = pattern_correlation(df_exp[test], test_cfg)

        if test == 'emi':
            log.banner('')
            log.banner("Perform emission test for each variable")
            log.banner('')
            df_result[test] = emissions(df_exp[test], df_ref[test], test_cfg)

        if test == 'rmse':
            log.banner('')
            log.banner("Perform rmse test for each variable")
            log.banner('')
            df_result[test] = rmse(df_exp[test], test_cfg)

        df_result[test] = sort_level_metric(df_result[test],
                                            test_cfg.metric_threshold,
                                            test_cfg.metric)
        df_result[test] = add_color_df_result(df_result[test],
                                              test_cfg.metric_threshold)

        print_warning_color(df_result[test], test_cfg.metric_threshold,
                            test_cfg.metric)

        if ltestsuite:
            for test in tests:
                test_cfg = get_config_of_current_test(test)
                utils.exit_if_testresult_is_bad(test, df_result[test],
                                                test_cfg.metric_threshold,
                                                test_cfg.metric)

    return df_result, df_ref