def run_data(directory: str, depth_age_file: str, prefix='KCC', depth='depth (m abs)', output=True): ''' Compiles all raw LA-ICP-MS data within the specified directory that share the specified prefix, compiles age and depth according to specified depth_age file ''' dfMR = DataFrame() dfLR = DataFrame() df_info = DataFrame() for folder in os.listdir(directory): if folder.startswith(prefix): for input_folder in sorted( os.listdir(os.path.join(directory, folder))): if input_folder.startswith('Input'): for file in sorted( os.listdir( os.path.join(directory, folder, input_folder))): if (file.startswith('InputFile_1')) |(file.startswith('Input') & file.endswith('1')) | (file.startswith('Input') & file.endswith('MR')) | \ (file.startswith('Input') & file.endswith('1.txt')) | (file.startswith('Input') & file.endswith('MR.txt')) : laser_files = load_input( os.path.join(directory, folder, input_folder, file)) for f in laser_files: dc = FrameClass( process_laser_data(f, depth_age_file)) dfMR = dfMR.append(pandas.concat([ f.info, dc.sample_year_df.mean().to_frame(). transpose() ], axis=1), ignore_index=True) elif (file.startswith('InputFile_2')) |(file.startswith('Input') & file.endswith('2')) | (file.startswith('Input') & file.endswith('LR')) | \ (file.startswith('Input') & file.endswith('2.txt')) | (file.startswith('Input') & file.endswith('LR.txt')) : laser_files = load_input( os.path.join(directory, folder, input_folder, file)) for f in laser_files: dc = FrameClass( process_laser_data(f, depth_age_file)) dfLR = dfLR.append(pandas.concat([ f.info, dc.sample_year_df.mean().to_frame(). transpose() ], axis=1), ignore_index=True) if output: output = 'Raw_by_Run' to_csv(directory, dfMR, 'LA-ICP-MS_{}_MR.csv'.format(output), False) to_csv(directory, dfLR, 'LA-ICP-MS_{}_LR.csv'.format(output), False) info_file = 'full_core_information.csv' if not os.path.isfile( os.path.join(directory, 'Output_Files', info_file)): to_csv(directory, df_info, info_file, False) readmeMR = readme_laser_file(laser_template, directory, prefix, depth_age_file, FrameClass(dfMR), 'Medium', str(datetime.date.today()), info_file, 'LA-ICP-MS_{}_MR.csv'.format(output), output, 'depth (m abs)') readmeLR = readme_laser_file(laser_template, directory, prefix, depth_age_file, FrameClass(dfLR), 'Low', str(datetime.date.today()), info_file, 'LA-ICP-MS_{}_LR.csv'.format(output), output, 'depth (m abs)') write_readmefile_to_txtfile( readmeMR, os.path.join(directory, 'Output_Files', '00README_{}_Medium_Resolution.txt'.format(output))) write_readmefile_to_txtfile( readmeLR, os.path.join(directory, 'Output_Files', '00README_{}_Low_Resolution.txt'.format(output))) return dfMR, dfLR
def resample(by: str, f: str, stat: str = ['mean'], inc_amt: int = 1, by_name: str = None, output=True): ''' Resampler by Years or Depths a. Input: dataset with years, depths, samples $ PYTHONPATH=. python climatechange/process_data.py -year_name ../test/csv_files/small.csv a. Output: csv file with statistics for each sample by years/depths :param: f: This is a CSV file ''' logging.info("Creating pdf for %s", f) dc = DataClass(f) if (by == 'year') | (by == 'Year') | (by == 'y') | (by == 'Y'): if by_name: headers = process_header_str(by_name) else: headers = dc.year_headers x = 0 elif (by == 'depth') | (by == 'Depth') | (by == 'd') | (by == 'D'): if by_name: headers = process_header_str(by_name) else: headers = dc.depth_headers x = 1 all_files = [] dfs = [] for h in headers: if x == 0: df = by_years(dc, h, inc_amt, stat) else: df = by_depths(dc, h, inc_amt, stat) dfs.append(df) if stat: if type(stat) == str: file = '{}_resample_by_{}_{}_{}.csv'.format( dc.base, inc_amt, h.label, stat) else: file = '{}_resample_by_{}_{}_{}.csv'.format( dc.base, inc_amt, h.label, '_'.join(stat)) else: file = '{}_resample_by_{}_{}_stats.csv'.format( dc.base, inc_amt, h.label) # pdf_file= '{}_resampled_by_{}_{}_resolution_for_{}.pdf'.format(h.label, inc_amt,h.unit,'_'.join(stat)) all_files.append(file) if output: to_csv(dc.dirname, df, file) readme = readme_output_file(resample_template, dc, str(datetime.date.today()), inc_amt, by, stat, all_files) write_readmefile_to_txtfile( readme, os.path.join( dc.dirname, '00README_resample_{}_{}_{}_resolution.txt'.format( h.label, inc_amt, by))) return dfs
def raw_data(directory, depth_age_file, prefix='KCC', output=True): ''' Compiles all raw LA-ICP-MS data within the specified directory that share the specified prefix, compiles age and depth according to specified depth_age file ''' dfMR = DataFrame() dfLR = DataFrame() df = DataFrame() for folder in os.listdir(directory): if folder.startswith(prefix): for input_folder in sorted( os.listdir(os.path.join(directory, folder))): if input_folder.startswith('Input'): for file in sorted( os.listdir( os.path.join(directory, folder, input_folder))): if (file.startswith('InputFile_1')) |(file.startswith('Input') & file.endswith('1')) | (file.startswith('Input') & file.endswith('MR')) | \ (file.startswith('Input') & file.endswith('1.txt')) | (file.startswith('Input') & file.endswith('MR.txt')) : laser_files = load_input( os.path.join(directory, folder, input_folder, file)) for f in laser_files: df = df.append(f.info, ignore_index=True) dfMR = dfMR.append(process_laser_data( f, depth_age_file), ignore_index=True) elif (file.startswith('InputFile_2')) |(file.startswith('Input') & file.endswith('2')) | (file.startswith('Input') & file.endswith('LR')) | \ (file.startswith('Input') & file.endswith('2.txt')) | (file.startswith('Input') & file.endswith('LR.txt')) : laser_files = load_input( os.path.join(directory, folder, input_folder, file)) for f in laser_files: dfLR = dfLR.append(process_laser_data( f, depth_age_file), ignore_index=True) dfMR = dfMR.set_index(['depth (m abs)']) dfLR = dfLR.set_index(['depth (m abs)']) if output: to_csv(directory, dfMR, 'LA-ICP-MS_raw_MR.csv') to_csv(directory, dfLR, 'LA-ICP-MS_raw_LR.csv') info_file = 'full_core_information.csv' to_csv(directory, df, info_file, False) readmeMR = readme_laser_file(laser_template, directory, prefix, depth_age_file, FrameClass(dfMR), 'Medium', str(datetime.date.today()), info_file, 'LA-ICP-MS_raw_MR.csv', 'Raw') readmeLR = readme_laser_file(laser_template, directory, prefix, depth_age_file, FrameClass(dfLR), 'Low', str(datetime.date.today()), info_file, 'LA-ICP-MS_raw_LR.csv', 'Raw') write_readmefile_to_txtfile( readmeMR, os.path.join(directory, 'Output_Files', '00README_Raw_Medium_Resolution.txt')) write_readmefile_to_txtfile( readmeLR, os.path.join(directory, 'Output_Files', '00README_Raw_Low_Resolution.txt')) return dfMR, dfLR
def resample_by(filename: str, resample_by: str, stat: List[str] = None, depth: str = None, output=True): ''' From the given data frame compile statistics (mean, median, min, max, etc) based on the parameters. :param df1:Larger Dataframe with smaller intervals to create a compiled stat :param df2:Smaller Dataframe with larger intervals to create index of intervals :return: A list of list of CompiledStat containing the resampled statistics for the specified sample and depth by the depth interval from df2. ''' dc = DataClass(filename) dc_by = DataClass(resample_by) if depth: headers = process_header_str(depth) else: headers = find_match(dc, dc_by) headers_by = [] resample = [] all_files = [] for h in headers: hr = dc.sample_df.set_index(dc.df[h.name]) lr = dc_by.sample_df.set_index(dc_by.df[h.name]) lr = lr[(lr.index >= min(hr.index)) & (lr.index <= max(hr.index))] stat_dict = [] for s in dc.sample_headers: df = DataFrame() if lr.empty: return [df] for i in range(len(lr.index.tolist()) - 1): idx = hr[(hr.index >= lr.index[i]) & (hr.index < lr.index[i + 1])] df = df.append(idx[s.name].describe(), ignore_index=True) if stat: df = df[stat] try: df.columns = [s.label + '_' + col for col in df] except TypeError: df.name = s.label + '_' + df.name stat_dict.append(df) if type(stat) == str: file = '{}_resampled_by_{}_{}_{}.csv'.format( dc.base, dc_by.base, h.label, stat) else: file = '{}_resampled_by_{}_{}_{}.csv'.format( dc.base, dc_by.base, h.label, '_'.join(stat)) else: df.columns = [s.label + '_' + col for col in df] stat_dict.append(df) file = '{}_resample_by_{}_{}.csv'.format( dc.base, dc_by.base, h.label) all_files.append(file) stat_df = pandas.concat(stat_dict, axis=1) stat_df = stat_df.set_index([lr.index[:-1]]) stat_df.index.name = h.label if output: to_csv(dc.dirname, stat_df, file) readme = readme_output_file(resample_template, dc, str(datetime.date.today()), dc_by.base, 'depth', stat, all_files) write_readmefile_to_txtfile( readme, os.path.join( dc.dirname, '00README_resample_{}_by_{}.txt'.format( h.label, dc_by.base))) headers_by.append(stat_df) return headers_by