def run_data(directory: str,
             depth_age_file: str,
             prefix='KCC',
             depth='depth (m abs)',
             output=True):
    '''
    
    Compiles all raw LA-ICP-MS data within the specified directory that share the
        specified prefix, compiles age and depth according to specified depth_age file
    '''
    dfMR = DataFrame()
    dfLR = DataFrame()
    df_info = DataFrame()

    for folder in os.listdir(directory):
        if folder.startswith(prefix):

            for input_folder in sorted(
                    os.listdir(os.path.join(directory, folder))):
                if input_folder.startswith('Input'):
                    for file in sorted(
                            os.listdir(
                                os.path.join(directory, folder,
                                             input_folder))):
                        if (file.startswith('InputFile_1')) |(file.startswith('Input') & file.endswith('1')) | (file.startswith('Input') & file.endswith('MR')) | \
                            (file.startswith('Input') & file.endswith('1.txt')) | (file.startswith('Input') & file.endswith('MR.txt')) :

                            laser_files = load_input(
                                os.path.join(directory, folder, input_folder,
                                             file))

                            for f in laser_files:
                                dc = FrameClass(
                                    process_laser_data(f, depth_age_file))

                                dfMR = dfMR.append(pandas.concat([
                                    f.info,
                                    dc.sample_year_df.mean().to_frame().
                                    transpose()
                                ],
                                                                 axis=1),
                                                   ignore_index=True)


                        elif (file.startswith('InputFile_2')) |(file.startswith('Input') & file.endswith('2')) | (file.startswith('Input') & file.endswith('LR')) | \
                            (file.startswith('Input') & file.endswith('2.txt')) | (file.startswith('Input') & file.endswith('LR.txt')) :

                            laser_files = load_input(
                                os.path.join(directory, folder, input_folder,
                                             file))

                            for f in laser_files:
                                dc = FrameClass(
                                    process_laser_data(f, depth_age_file))

                                dfLR = dfLR.append(pandas.concat([
                                    f.info,
                                    dc.sample_year_df.mean().to_frame().
                                    transpose()
                                ],
                                                                 axis=1),
                                                   ignore_index=True)

    if output:
        output = 'Raw_by_Run'
        to_csv(directory, dfMR, 'LA-ICP-MS_{}_MR.csv'.format(output), False)
        to_csv(directory, dfLR, 'LA-ICP-MS_{}_LR.csv'.format(output), False)

        info_file = 'full_core_information.csv'
        if not os.path.isfile(
                os.path.join(directory, 'Output_Files', info_file)):
            to_csv(directory, df_info, info_file, False)

        readmeMR = readme_laser_file(laser_template,
                                     directory, prefix, depth_age_file,
                                     FrameClass(dfMR), 'Medium',
                                     str(datetime.date.today()), info_file,
                                     'LA-ICP-MS_{}_MR.csv'.format(output),
                                     output, 'depth (m abs)')
        readmeLR = readme_laser_file(laser_template, directory, prefix,
                                     depth_age_file, FrameClass(dfLR), 'Low',
                                     str(datetime.date.today()), info_file,
                                     'LA-ICP-MS_{}_LR.csv'.format(output),
                                     output, 'depth (m abs)')

        write_readmefile_to_txtfile(
            readmeMR,
            os.path.join(directory, 'Output_Files',
                         '00README_{}_Medium_Resolution.txt'.format(output)))
        write_readmefile_to_txtfile(
            readmeLR,
            os.path.join(directory, 'Output_Files',
                         '00README_{}_Low_Resolution.txt'.format(output)))

    return dfMR, dfLR
def resample(by: str,
             f: str,
             stat: str = ['mean'],
             inc_amt: int = 1,
             by_name: str = None,
             output=True):
    '''
    Resampler by Years or Depths
    a. Input: dataset with years, depths, samples
    
    $ PYTHONPATH=. python climatechange/process_data.py -year_name ../test/csv_files/small.csv

    a. Output: csv file with statistics for each sample by years/depths

    :param: f: This is a CSV file
    '''
    logging.info("Creating pdf for %s", f)
    dc = DataClass(f)
    if (by == 'year') | (by == 'Year') | (by == 'y') | (by == 'Y'):
        if by_name:
            headers = process_header_str(by_name)
        else:
            headers = dc.year_headers
        x = 0
    elif (by == 'depth') | (by == 'Depth') | (by == 'd') | (by == 'D'):
        if by_name:
            headers = process_header_str(by_name)
        else:
            headers = dc.depth_headers
        x = 1

    all_files = []
    dfs = []
    for h in headers:
        if x == 0:
            df = by_years(dc, h, inc_amt, stat)
        else:
            df = by_depths(dc, h, inc_amt, stat)
        dfs.append(df)

        if stat:
            if type(stat) == str:
                file = '{}_resample_by_{}_{}_{}.csv'.format(
                    dc.base, inc_amt, h.label, stat)
            else:
                file = '{}_resample_by_{}_{}_{}.csv'.format(
                    dc.base, inc_amt, h.label, '_'.join(stat))
        else:
            file = '{}_resample_by_{}_{}_stats.csv'.format(
                dc.base, inc_amt, h.label)

#         pdf_file= '{}_resampled_by_{}_{}_resolution_for_{}.pdf'.format(h.label, inc_amt,h.unit,'_'.join(stat))
        all_files.append(file)
        if output:

            to_csv(dc.dirname, df, file)

            readme = readme_output_file(resample_template, dc,
                                        str(datetime.date.today()), inc_amt,
                                        by, stat, all_files)
            write_readmefile_to_txtfile(
                readme,
                os.path.join(
                    dc.dirname,
                    '00README_resample_{}_{}_{}_resolution.txt'.format(
                        h.label, inc_amt, by)))

    return dfs
def raw_data(directory, depth_age_file, prefix='KCC', output=True):
    '''
    
    Compiles all raw LA-ICP-MS data within the specified directory that share the
        specified prefix, compiles age and depth according to specified depth_age file
    '''
    dfMR = DataFrame()
    dfLR = DataFrame()
    df = DataFrame()

    for folder in os.listdir(directory):
        if folder.startswith(prefix):

            for input_folder in sorted(
                    os.listdir(os.path.join(directory, folder))):
                if input_folder.startswith('Input'):
                    for file in sorted(
                            os.listdir(
                                os.path.join(directory, folder,
                                             input_folder))):
                        if (file.startswith('InputFile_1')) |(file.startswith('Input') & file.endswith('1')) | (file.startswith('Input') & file.endswith('MR')) | \
                            (file.startswith('Input') & file.endswith('1.txt')) | (file.startswith('Input') & file.endswith('MR.txt')) :

                            laser_files = load_input(
                                os.path.join(directory, folder, input_folder,
                                             file))

                            for f in laser_files:
                                df = df.append(f.info, ignore_index=True)
                                dfMR = dfMR.append(process_laser_data(
                                    f, depth_age_file),
                                                   ignore_index=True)

                        elif (file.startswith('InputFile_2')) |(file.startswith('Input') & file.endswith('2')) | (file.startswith('Input') & file.endswith('LR')) | \
                            (file.startswith('Input') & file.endswith('2.txt')) | (file.startswith('Input') & file.endswith('LR.txt')) :

                            laser_files = load_input(
                                os.path.join(directory, folder, input_folder,
                                             file))

                            for f in laser_files:
                                dfLR = dfLR.append(process_laser_data(
                                    f, depth_age_file),
                                                   ignore_index=True)

    dfMR = dfMR.set_index(['depth (m abs)'])
    dfLR = dfLR.set_index(['depth (m abs)'])
    if output:
        to_csv(directory, dfMR, 'LA-ICP-MS_raw_MR.csv')
        to_csv(directory, dfLR, 'LA-ICP-MS_raw_LR.csv')
        info_file = 'full_core_information.csv'
        to_csv(directory, df, info_file, False)

        readmeMR = readme_laser_file(laser_template,
                                     directory, prefix, depth_age_file,
                                     FrameClass(dfMR), 'Medium',
                                     str(datetime.date.today()), info_file,
                                     'LA-ICP-MS_raw_MR.csv', 'Raw')
        readmeLR = readme_laser_file(laser_template, directory, prefix,
                                     depth_age_file, FrameClass(dfLR), 'Low',
                                     str(datetime.date.today()), info_file,
                                     'LA-ICP-MS_raw_LR.csv', 'Raw')

        write_readmefile_to_txtfile(
            readmeMR,
            os.path.join(directory, 'Output_Files',
                         '00README_Raw_Medium_Resolution.txt'))
        write_readmefile_to_txtfile(
            readmeLR,
            os.path.join(directory, 'Output_Files',
                         '00README_Raw_Low_Resolution.txt'))

    return dfMR, dfLR
def resample_by(filename: str,
                resample_by: str,
                stat: List[str] = None,
                depth: str = None,
                output=True):
    '''
    From the given data frame compile statistics (mean, median, min, max, etc)
    based on the parameters.
 
    :param df1:Larger Dataframe with smaller intervals to create a compiled stat
    :param df2:Smaller Dataframe with larger intervals to create index of intervals
    :return: A list of list of CompiledStat containing the resampled statistics for the
    specified sample and depth by the depth interval from df2.
    '''

    dc = DataClass(filename)
    dc_by = DataClass(resample_by)
    if depth:
        headers = process_header_str(depth)
    else:
        headers = find_match(dc, dc_by)

    headers_by = []
    resample = []
    all_files = []
    for h in headers:
        hr = dc.sample_df.set_index(dc.df[h.name])
        lr = dc_by.sample_df.set_index(dc_by.df[h.name])

        lr = lr[(lr.index >= min(hr.index)) & (lr.index <= max(hr.index))]

        stat_dict = []
        for s in dc.sample_headers:
            df = DataFrame()
            if lr.empty:
                return [df]
            for i in range(len(lr.index.tolist()) - 1):

                idx = hr[(hr.index >= lr.index[i])
                         & (hr.index < lr.index[i + 1])]

                df = df.append(idx[s.name].describe(), ignore_index=True)

            if stat:
                df = df[stat]
                try:
                    df.columns = [s.label + '_' + col for col in df]
                except TypeError:
                    df.name = s.label + '_' + df.name
                stat_dict.append(df)
                if type(stat) == str:
                    file = '{}_resampled_by_{}_{}_{}.csv'.format(
                        dc.base, dc_by.base, h.label, stat)
                else:
                    file = '{}_resampled_by_{}_{}_{}.csv'.format(
                        dc.base, dc_by.base, h.label, '_'.join(stat))
            else:
                df.columns = [s.label + '_' + col for col in df]
                stat_dict.append(df)
                file = '{}_resample_by_{}_{}.csv'.format(
                    dc.base, dc_by.base, h.label)
        all_files.append(file)

        stat_df = pandas.concat(stat_dict, axis=1)
        stat_df = stat_df.set_index([lr.index[:-1]])

        stat_df.index.name = h.label
        if output:
            to_csv(dc.dirname, stat_df, file)

            readme = readme_output_file(resample_template, dc,
                                        str(datetime.date.today()), dc_by.base,
                                        'depth', stat, all_files)
            write_readmefile_to_txtfile(
                readme,
                os.path.join(
                    dc.dirname, '00README_resample_{}_by_{}.txt'.format(
                        h.label, dc_by.base)))

    headers_by.append(stat_df)
    return headers_by