async def match_runs_to_lines(logger): """ Read new log files and create new GcRun and Sample objects if possible. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from summit_core import methane_dir as rundir from summit_core import connect_to_db from summit_methane import GcRun, PaLine, match_lines_to_runs, Base except ImportError as e: send_processor_email(PROC, exception=e) logger.error('ImportError occured in match_runs_to_lines()') return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_load_pa_log()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running match_runs_to_peaks()') engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) unmatched_lines = session.query(PaLine).filter( PaLine.run == None).all() unmatched_runs = session.query(GcRun).filter( GcRun.pa_line_id == None).all() # married_runs_count = session.query(GcRun).filter(GcRun.status == 'married').count() lines, runs, count = match_lines_to_runs(unmatched_lines, unmatched_runs) session.commit() if count: logger.info(f'{count} GcRuns matched with PaLines.') return True else: logger.info('No new GcRun-PaLine pairs matched.') return False except Exception as e: logger.error(f'Exception {e.args} occurred in match_runs_to_lines()') send_processor_email(PROC, exception=e) return False
async def add_one_standard(logger): """ Add a single standard (the current working one), so that quantifications are possible. VERY TEMPORARY. :param logger: logger, to log events to :return: Boolean, True if successful """ try: from summit_core import methane_dir as rundir from summit_core import connect_to_db from summit_methane import Standard, Base except ImportError as e: logger.error('ImportError occurred in add_one_standard()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_load_pa_log()' ) send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) current_standard_dates = [ S.date_st for S in session.query(Standard).all() ] my_only_standard = Standard('ws_2019', 2067.16, datetime(2019, 1, 1), datetime(2019, 12, 31)) if my_only_standard.date_st not in current_standard_dates: session.merge(my_only_standard) session.commit() session.close() engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in add_one_standard()') send_processor_email(PROC, exception=e) return False
def get_last_processor_date(processor, logger): """ Retrieves the latest high-level date for the specified processor. It looks at GcRuns for VOCs (complete runs), 5-second Datums for the Picarro, and matched GcRuns for methane. :param processor: str, in ['voc', 'picarro', 'methane'] :param logger: logging logger :return: datetime, date of last data point for the specified processor """ from summit_core import connect_to_db, TempDir if processor is 'voc': from summit_core import voc_dir as directory from summit_voc import GcRun as DataType elif processor is 'picarro': from summit_core import picarro_dir as directory from summit_picarro import Datum as DataType elif processor is 'methane': from summit_core import methane_dir as directory from summit_methane import GcRun as DataType else: logger.error('Invalid processor supplied to get_last_processor_date()') assert False, 'Invalid processor supplied to get_last_processor_date()' with TempDir(directory): engine, session = connect_to_db(f'sqlite:///summit_{processor}.sqlite', directory) val = session.query(DataType.date).order_by( DataType.date.desc()).first() if val: val = val[0] session.close() engine.dispose() return val
ax.set_ylabel(f'Mixing Ratio ({unit_string})', fontsize=20) ax.set_title(f'{comp_list}', fontsize=24, y=1.02) ax.legend() f1.subplots_adjust(bottom=.20) plot_name = f'{fn_list}_last_week.png' f1.savefig(plot_name, dpi=150) plt.close(f1) return plot_name # wanted to return the figure to add to it with methane # Connect to the Picarro DB rundir = r'C:\Users\ARL\Desktop\Testing DB' # location of DB engine, session = connect_to_db('sqlite:///Jsummit_picarro.sqlite', rundir) # Create eng & sess Base.metadata.create_all(engine) # Create base date_limits, major_ticks, minor_ticks = custom_create_daily_ticks(6) all_data = ( session.query(Datum.date, Datum.ch4) # get date and methane .filter((Datum.mpv_position == 0.0) | (Datum.mpv_position == 1.0)) # filter for not cal events .filter((Datum.instrument_status == 963), (Datum.alarm_status == 0)) # filter out bad data .filter(Datum.date >= date_limits['left']) # just get certain dates .all()) # Gather the Picarro Methane Data picarro_dates = [] picarro_ch4 = []
import pandas as pd from datetime import datetime from summit_methane import GcRun, add_formulas_and_format_sheet from summit_core import connect_to_db, append_df_to_excel from summit_core import methane_dir, data_file_paths methane_sheet = data_file_paths.get('methane_sheet', None) if not methane_sheet: pass # TODO: ERROR! engine, session = connect_to_db('sqlite:///summit_methane.sqlite', methane_dir) runs_for_this_year = session.query(GcRun).filter( GcRun.date.between(datetime(2019, 1, 1), datetime.now())).all() col_list = [ 'date', 'filename', 'peak1', 'peak2', 'mr1', 'mr2', 'run_median', 'run_rsd', 'std_median', 'std_rsd' ] master_df = pd.DataFrame(index=None, columns=col_list) for run in runs_for_this_year: df = pd.DataFrame(index=range(1, 6), columns=col_list) df['date'][1] = run.date df['filename'][1] = run.logfile.name # The below can be turned on to copy peak information from the automatic integrations into the spreadsheet
async def update_excel_sheet(logger): """ This checks for new GcRuns since it was last ran and creates a DataFrame containing run information that's appended to a spreadsheet on the Z-drive. This sheet is filled out by whoever does the manual integration, and is later read by TODO - I haven't written that yet to bring the updated peak areas back into the database and re-calculate mixing ratios. :param logger: logging logger for info and failures :return: bool, True if ran, False if errored """ logger.info('Running update_excel_sheet()') try: import pandas as pd from datetime import datetime from summit_core import methane_dir as rundir from summit_errors import send_processor_warning from summit_methane import GcRun, Base, add_formulas_and_format_sheet from summit_core import Config, connect_to_db, append_df_to_excel from summit_core import methane_dir, core_dir, data_file_paths methane_sheet = data_file_paths.get('methane_sheet', None) if not methane_sheet: logger.error( 'Filepath for the methane integration sheet could not be retrieved.' ) send_processor_warning( PROC, 'Filepath Error', '''The methane integration sheet filepath could not be retrieved. It should be listed as "methane_sheet" in file_locations.json in the core folder.''' ) return False except ImportError as e: logger.error('ImportError occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Config.__table__.create(core_engine, checkfirst=True) methane_sheet_config = core_session.query(Config).filter( Config.processor == 'methane_sheet').one_or_none() if not methane_sheet_config: methane_sheet_config = Config(processor='methane_sheet') # use all default values except processor on init core_session.add(methane_sheet_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: most_recent_gcrun = session.query(GcRun.date).order_by( GcRun.date.desc()).first() if not most_recent_gcrun: most_recent_gcrun = datetime( 1900, 1, 1) # default to a safely historic date else: most_recent_gcrun = most_recent_gcrun.date # get date from tuple response # object list of all the runs past the most recent date new_runs = session.query(GcRun).filter( GcRun.date > methane_sheet_config.last_data_date).all() if new_runs: col_list = [ 'date', 'filename', 'peak1', 'peak2', 'mr1', 'mr2', 'run_median', 'run_rsd', 'std_median', 'std_rsd' ] # list of all columns needed in the dataframe master_df = pd.DataFrame( index=None, columns=col_list) # frame an empty df for new run data for run in new_runs: df = pd.DataFrame( index=range(1, 6), columns=col_list) # create a five-row block to add later df['date'][1] = run.date df['filename'][ 1] = run.logfile.name # add date and filename for this block # The below can copy peak information from the automatic integrations into the spreadsheet # peaks1 = [sample.peak for sample in run.samples if sample.sample_num in [0,2,4,6,8]] # peaks2 = [sample.peak for sample in run.samples if sample.sample_num in [1,3,5,7,9]] # df.loc[0:5, 'peak1'] = [(peak.pa if peak else None) for peak in peaks1] # df.loc[0:5, 'peak2'] = [(peak.pa if peak else None) for peak in peaks2] master_df = master_df.append( df) # append block to all new ones so far # TODO: Anything touching sheets need to be carefully made to catch inacessible files ###################### append_df_to_excel(methane_sheet, master_df, **{'index': False}) # add all new lines and save sheet add_formulas_and_format_sheet( methane_sheet ) # open sheet and add formulas where non-existent, format cols logger.info( 'New GcRuns added to the automated integration spreadsheet.') methane_sheet_config.last_data_date = most_recent_gcrun else: logger.info( 'No new GcRuns found to add to the automated integration spreadsheet.' ) core_session.merge(methane_sheet_config) core_session.commit() session.close() engine.dispose() core_session.close() core_engine.dispose() return True except Exception as e: session.close() engine.dispose() core_session.close() core_engine.dispose() logger.error(f'Exception {e.args} occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False
async def dual_plot_methane(logger): """ Connects to both the methane [gc] and picarro databases to create an overlayed plot of both data. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ PROC = 'Methane DualPlotter' try: from pathlib import Path from summit_core import core_dir, Config from summit_core import methane_dir from summit_core import picarro_dir from summit_core import connect_to_db, create_daily_ticks, TempDir, Plot, add_or_ignore_plot from summit_picarro import Datum from summit_methane import Base, GcRun, summit_methane_plot from summit_picarro import Base as PicarroBase remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots' except ImportError as e: logger.error('ImportError occurred in dual_plot_methane()') send_processor_email(PROC, exception=e) return False try: gc_engine, gc_session = connect_to_db( 'sqlite:///summit_methane.sqlite', methane_dir) Base.metadata.create_all(gc_engine) picarro_engine, picarro_session = connect_to_db( 'sqlite:///summit_picarro.sqlite', picarro_dir) PicarroBase.metadata.create_all(picarro_engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in dual_plot_methane()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) twoplot_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not twoplot_config: twoplot_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(twoplot_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running dual_plot_methane()') newest_picarro_data_point = (picarro_session.query(Datum.date).filter( Datum.mpv_position == 1).order_by(Datum.date.desc()).first()[0]) try: newest_gc_data_point = (gc_session.query(GcRun.date).filter( GcRun.median != None).filter(GcRun.standard_rsd < .02).filter( GcRun.rsd < .02).order_by(GcRun.date.desc()).first()[0]) except TypeError: logger.error( 'NoneType not subscriptable encountered due to lack of methane data to query.' ) from summit_errors import send_processor_warning send_processor_warning( PROC, 'Dual Plotter', '''The Methane Dual Plotter could not query any GcRuns for methane data.\n Check the database to make sure there are in fact GcRuns with medians and valid rsds. \nThis often happens when the methane database is remade without re-setting the filesize and pa_startlie in the config table of Core database, thus no peaks are found.''') return False newest_data_point = max(newest_picarro_data_point, newest_gc_data_point) if newest_data_point <= twoplot_config.last_data_date: logger.info('No new data was found to plot.') core_session.close() core_engine.dispose() picarro_session.close() picarro_engine.dispose() return False date_limits, major_ticks, minor_ticks = create_daily_ticks( twoplot_config.days_to_plot) if newest_data_point > twoplot_config.last_data_date: runs_with_medians = (gc_session.query(GcRun).filter( GcRun.median != None).filter(GcRun.standard_rsd < .02).filter( GcRun.rsd < .02).order_by(GcRun.date).all()) gc_dates = [run.date for run in runs_with_medians] gc_ch4 = [run.median for run in runs_with_medians] picarro_data = (picarro_session.query( Datum.date, Datum.ch4).filter((Datum.mpv_position == 0) | ( Datum.mpv_position == 1)).filter( (Datum.instrument_status == 963), (Datum.alarm_status == 0)).filter( Datum.date >= date_limits['left']).all() ) # grab only data that falls in plotting period picarro_dates = [p.date for p in picarro_data] picarro_ch4 = [p.ch4 for p in picarro_data] with TempDir(methane_dir / 'plots'): name = summit_methane_plot( None, { 'Summit Methane [Picarro]': [picarro_dates, picarro_ch4], 'Summit Methane [GC]': [gc_dates, gc_ch4] }, title='Summit Methane [Picarro & GC]', limits={ 'bottom': 1850, 'top': 2050, 'right': date_limits.get('right', None), 'left': date_limits.get('left', None) }, major_ticks=major_ticks, minor_ticks=minor_ticks) methane_plot = Plot(methane_dir / 'plots' / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(methane_plot, core_session) twoplot_config.last_data_date = newest_data_point core_session.merge(twoplot_config) logger.info('New data plots created.') else: logger.info('No new data found to be plotted.') gc_session.close() gc_engine.dispose() picarro_session.close() picarro_engine.dispose() core_session.commit() core_session.close() core_engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in dual_plot_methane()') send_processor_email(PROC, exception=e) core_session.close() core_engine.dispose() gc_session.close() gc_engine.dispose() picarro_session.close() picarro_engine.dispose() return False
async def check_load_dailies(logger): """ TODO: :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from summit_core import connect_to_db, get_all_data_files, core_dir, daily_logs_path, search_for_attr_value except ImportError as e: logger.error(f'ImportError occurred in check_load_dailies()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_daily.sqlite', core_dir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Error {e.args} prevented connecting to the database in check_load_dailies()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running check_load_dailies()') daily_files_in_db = session.query(DailyFile).all() daily_files = [ DailyFile(path) for path in get_all_data_files(daily_logs_path, '.txt') ] new_files = [] for file in daily_files: file_in_db = search_for_attr_value(daily_files_in_db, 'path', file.path) if not file_in_db: new_files.append(file) logger.info(f'File {file.name} added for processing.') else: if file.size > file_in_db.size: logger.info( f'File {file_in_db.name} added to process additional data.' ) new_files.append(file_in_db) if new_files: for file in new_files: dailies = read_daily_file(file.path) file_daily_dates = [d.date for d in file.entries] file.entries.extend( [d for d in dailies if d.date not in file_daily_dates]) file.size = file.path.stat().st_size session.merge(file) session.commit() session.close() engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in check_load_dailies()') send_processor_email(PROC, exception=e) session.close() engine.dispose() return False
async def find_cal_events(logger): """ Searches the existing data for unused calibration data and creates/commits CalEvents if possible. :param logger: logging logger at module level :return: boolean, did it run/process new data? """ logger.info('Running find_cal_events()') try: from summit_core import connect_to_db from summit_core import picarro_dir as rundir from summit_picarro import Base, Datum, CalEvent, mpv_converter, find_cal_indices from summit_picarro import log_event_quantification, filter_postcal_data except Exception as e: logger.error('ImportError occured in find_cal_events()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_picarro.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error(f'Exception {e.args} occurred in find_cal_events()') send_processor_email(PROC, exception=e) return False try: standard_data = {} for MPV in [2, 3, 4]: mpv_data = pd.DataFrame( session.query( Datum.id, Datum.date).filter(Datum.mpv_position == MPV).filter( Datum.cal_id == None).all()) # get only data for this switching valve position, and not already in any calibration event if not len(mpv_data): logger.info( f'No new calibration events found for standard {mpv_converter[MPV]}' ) continue mpv_data['date'] = pd.to_datetime(mpv_data['date']) # use mpv_converter dict to get standard information standard_data[mpv_converter[MPV]] = mpv_data.sort_values( by=['date']).reset_index(drop=True) for standard, data in standard_data.items(): indices = find_cal_indices(data['date']) cal_events = [] if not len(indices) and len(data): # if there's not provided indices, but there's still calibration data, create the one event event_data = session.query(Datum).filter( Datum.id.in_(data['id'])).all() cal_events.append(CalEvent(event_data, standard)) elif not len(indices): # if there's no provided indices logger.info( f'No new cal events were found for {standard} standard.') continue prev_ind = 0 for num, ind in enumerate( indices): # get all data within this event event_data = session.query(Datum).filter( Datum.id.in_(data['id'].iloc[prev_ind:ind])).all() cal_events.append(CalEvent(event_data, standard)) if num == ( len(indices) - 1 ): # if it's the last index, get all ahead of it as the last event event_data = session.query(Datum).filter( Datum.id.in_(data['id'].iloc[ind:])).all() cal_events.append(CalEvent(event_data, standard)) prev_ind = ind for ev in cal_events: filter_postcal_data( ev, session ) # flag the following minute as questionable data (inst_status = 999) if ev.date - ev.dates[0] < dt.timedelta(seconds=90): logger.info( f'CalEvent for date {ev.date} had a duration < 90s and was ignored.' ) ev.standard_used = 'dump' # give not-long-enough events standard type 'dump' so they're ignored session.merge(ev) else: for cpd in ['co', 'co2', 'ch4']: ev.calc_result( cpd, 21 ) # calculate results for all compounds going 21s back session.merge(ev) logger.info(f'CalEvent for date {ev.date} added.') log_event_quantification( logger, ev) # show quantification info as DEBUG in log session.commit() return True except Exception as e: logger.error(f'Exception {e.args} occurred in find_cal_events()') send_processor_email(PROC, exception=e) return False
async def check_load_new_data(logger): """ Checks for new files, checks length of old ones for updates, and processes/commits new data to the database. :param logger: logging logger at module level :return: boolean, did it run/process new data? """ logger.info('Running check_load_new_data()') try: from summit_core import picarro_logs_path as data_path from summit_core import picarro_dir as rundir from summit_core import connect_to_db, get_all_data_files, check_filesize from summit_picarro import Base, DataFile, Datum from sqlalchemy.orm.exc import MultipleResultsFound from summit_errors import EmailTemplate, sender, processor_email_list from pandas.errors import ParserError except ImportError as e: logger.error('ImportError occurred in check_load_new_data()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_picarro.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} caused database connection to fail in check_load_new_data()' ) send_processor_email(PROC, exception=e) return False try: db_files = session.query(DataFile) db_filenames = [d.name for d in db_files.all()] all_available_files = get_all_data_files(data_path, '.dat') files_to_process = session.query(DataFile).filter( DataFile.processed == False).all() for file in all_available_files: try: db_match = db_files.filter( DataFile._name == file.name).one_or_none() except MultipleResultsFound: logger.warning( f'Multiple results found for file {file.name}. The first was used.' ) db_match = db_files.filter(DataFile._name == file.name).first() if file.name not in db_filenames: files_to_process.append(DataFile(file)) elif check_filesize(file) > db_match.size: # if a matching file was found and it's now bigger, append for processing logger.info( f'File {file.name} had more data and was added for procesing.' ) files_to_process.append(db_match) if not files_to_process: logger.warning('No new data was found.') return False for ind, file in enumerate(files_to_process): files_to_process[ind] = session.merge( file ) # merge files and return the merged object to overwrite the old logger.info(f'File {file.name} added for processing.') session.commit() for file in files_to_process: try: df = pd.read_csv(file.path, delim_whitespace=True) except EmptyDataError as e: logger.error( f'Exception {e.args} occurred while reading {file.name}') send_processor_email(PROC, exception=e) continue except ParserError as e: logger.error( f'Pandas ParserError occurred while reading {file.name}.') from summit_errors import send_processor_warning try: df = pd.read_csv(file.path, delim_whitespace=True, error_bad_lines=False) send_processor_warning(PROC, 'Dataframe', ( f'The Picarro Processor failed to read file {file.name} ' + 'It was re-parsed, skipping unreadable lines, but should be' + ' investigated.')) except Exception as e: logger.error( f'Exception {e.args} occurred in check_load_new_data() while reading a file.' + f' The file was {file.name}') send_processor_email(PROC, exception=e) continue except Exception as e: logger.error( f'Exception {e.args} occurred in check_load_new_data() while reading a file.' + f' The file was {file.name}') send_processor_email(PROC, exception=e) continue original_length = len(df) df.dropna(axis=0, how='any', inplace=True) new_length = len(df) diff = original_length - new_length if diff: logger.warning( f'Dataframe contained {diff} null values in {file.name}.') from summit_errors import send_processor_warning send_processor_warning(PROC, 'DataFrame', ( f'The Picarro Processor cut {diff} lines from a dataframe after reading it.\n' + f'{file.name} should be investigated and cleaned if necessary.' )) # CO2 stays in ppm df['CO_sync'] *= 1000 # convert CO to ppb df['CH4_sync'] *= 1000 # convert CH4 to ppb df['CH4_dry_sync'] *= 1000 df_list = df.to_dict('records') # convert to list of dicts data_list = [] for line in df_list: data_list.append(Datum(line)) if data_list: data_dates = [d.date for d in data_list] dates_already_in_db = session.query(Datum.date).filter( Datum.date.in_(data_dates)).all() dates_already_in_db[:] = [d.date for d in dates_already_in_db] for d in data_list: if d.date not in dates_already_in_db: d.file_id = file.id # relate Datum to the file it originated in session.add(d) else: logger.info(f'No new data created from file {file.name}.') file.processed = True file.size = check_filesize(file.path) logger.info(f'All data in file {file.name} processed.') session.commit() return True except Exception as e: logger.error(f'Exception {e.args} occurred in check_load_new_data().') send_processor_email(PROC, exception=e) return False
async def check_load_pa_log(logger): """ Read the PA log and create new PaLine objects if possible. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ logger.info('Running check_load_pa_log()') try: from summit_core import methane_LOG_path as pa_filepath from summit_core import connect_to_db, check_filesize, core_dir, Config, split_into_sets_of_n from summit_methane import Base, read_pa_line, PaLine from summit_core import methane_dir as rundir from pathlib import Path except ImportError as e: logger.error('ImportError occurred in check_load_pa_log()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_load_pa_log()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Config.__table__.create(core_engine, checkfirst=True) ch4_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not ch4_config: ch4_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(ch4_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: if check_filesize(pa_filepath) <= ch4_config.filesize: logger.info('PA file did not change size.') return False ch4_config.filesize = check_filesize(pa_filepath) core_session.merge(ch4_config) core_session.commit() line_to_start = ch4_config.pa_startline - 3 # pad start to avoid missing samples if line_to_start < 0: line_to_start = 0 pa_file_contents = pa_filepath.read_text().split('\n')[line_to_start:] ch4_config.pa_startline = ch4_config.pa_startline + len( pa_file_contents) - 1 pa_file_contents[:] = [line for line in pa_file_contents if line] pa_lines = [] for line in pa_file_contents: pa_lines.append(read_pa_line(line)) if not pa_lines: logger.info('No new PaLines found.') return False else: ct = 0 # count committed logs all_line_dates = [line.date for line in pa_lines] # SQLite can't take in clauses with > 1000 variables, so chunk to sets of 500 if len(all_line_dates) > 500: sets = split_into_sets_of_n(all_line_dates, 500) else: sets = [all_line_dates] # TODO: Can be reduced to just splitting, this step is done automatically by split_into. dates_already_in_db = [] for set in sets: set_matches = session.query(PaLine.date).filter( PaLine.date.in_(set)).all() set_matches[:] = [s.date for s in set_matches] dates_already_in_db.extend(set_matches) for line in pa_lines: if line.date not in dates_already_in_db: session.add(line) logger.info(f'PaLine for {line.date} added.') ct += 1 if ct == 0: logger.info('No new PaLines found.') else: logger.info(f'{ct} PaLines added.') session.commit() core_session.merge(ch4_config) core_session.commit() session.close() engine.dispose() core_session.close() core_engine.dispose() return True except Exception as e: session.close() engine.dispose() core_session.close() core_engine.dispose() logger.error(f'Exception {e.args} occurred in check_load_pa_log()') send_processor_email(PROC, exception=e) return False
def main(): """ This function creates calibration events with a valve position of 5, and places them in the Picarro Database for future analysis. :param N/A -- Perhaps logger once incorperated into full code :return: boolean, did it run? !! Note: To view the plots, comment out line 36 and set a breakpoint directly after plt.show() on line 131 """ # Import Required Functions from summit_core import connect_to_db from summit_picarro import Base, Datum, CalEvent, find_cal_indices from summit_picarro import filter_postcal_data from matplotlib.pyplot import figure # Connect to the database rundir = r'C:\Users\ARL\Desktop' # location of DB engine, session = connect_to_db('sqlite:///JASHAN_summit_picarro.sqlite', rundir) # Create eng & sess Base.metadata.create_all(engine) # Create base # Get any data with a valve position of 5 standard_data = {} MPV = 5 mpv_data = pd.DataFrame( session.query(Datum.id, Datum.date) # Gets the datum ID & Date .filter(Datum.mpv_position == MPV) # Filters them for valve pos #5 .filter(Datum.cal_id == None) # only if not already any cal event .all()) # actually gathers the data mpv_data['date'] = pd.to_datetime( mpv_data['date']) # Convert to PD datetime version mpv_converter = { 5: 'ch4_GC_std' } # TODO: Incorporate in larger project code standard_data[mpv_converter[MPV]] = mpv_data.sort_values( by=['date']).reset_index(drop=True) # Create a calc_event with the given name of this standard for standard, data in standard_data.items(): indices = find_cal_indices( data['date']) # Gathers the indicies of each new cal event cal_events = [] # preallocation of cal events prev_ind = 0 # prev_ind is initially the first index # If indicies is empty, but there is still data, create a single event if not len(indices) and len(data): event_data = (session.query(Datum).filter(Datum.id.in_( data['id'])).all()) cal_events.append(CalEvent(event_data, standard)) # Seperate cal events from gathered indicies and place in cal_events for num, ind in enumerate(indices): event_data = ( session.query(Datum) # Searches through all Datums .filter(Datum.id.in_(data['id'].iloc[prev_ind:ind]) ) # Data bewteen index and previous .all()) # actually gathers the data cal_events.append(CalEvent(event_data, standard)) # appends cal events list if num == (len(indices) - 1): # last index, gets the rest event_data = ( session.query(Datum).filter( Datum.id.in_( data['id'].iloc[ind:])) # index to end of list .all()) cal_events.append(CalEvent( event_data, standard)) # appends cal events list # Create a plot of this cal event coPlot, co2Plot, ch4Plot, datePlot = [], [], [], [] for x in event_data: coPlot.append(x.co) co2Plot.append(x.co2) ch4Plot.append(x.ch4) # Raw Numbers if datePlot == []: timestep = x.date.timestamp() datePlot.append(timestep) else: timestep = x.date.timestamp() - datePlot[ 0] # likely a better way datePlot.append(timestep) # to do this ev = cal_events[len(indices)] for cpd in ['co', 'co2', 'ch4']: time = (ev.date - ev.dates[0]).seconds ev.calc_result(cpd, time) datePlot[0] = 0 # start it at 0 figure(1) table_vals = [[list(ev.co_result.values())[0]], [list(ev.co_result.values())[1]], [list(ev.co_result.values())[2]]] the_table = plt.table(cellText=table_vals, cellColours=None, cellLoc='right', colWidths=[0.3] * 3, rowLabels=['mean', 'median', 'stdev'], rowColours=None, rowLoc='left', colLabels=['value'], colColours=None, colLoc='center', loc='lower right', bbox=None) plt.plot(datePlot, coPlot, label='co') plt.xlabel('Time since start of cal_event [seconds]') plt.ylabel('Compounds') plt.title('CO') figure(2) table_vals = [[list(ev.co2_result.values())[0]], [list(ev.co2_result.values())[1]], [list(ev.co2_result.values())[2]]] the_table = plt.table(cellText=table_vals, cellColours=None, cellLoc='right', colWidths=[0.3] * 3, rowLabels=['mean', 'median', 'stdev'], rowColours=None, rowLoc='left', colLabels=['value'], colColours=None, colLoc='center', loc='upper right', bbox=None) plt.plot(datePlot, co2Plot, label='co2') plt.xlabel('Time since start of cal_event [seconds]') plt.ylabel('Compounds') plt.title('CO2') figure(3) table_vals = [[list(ev.ch4_result.values())[0]], [list(ev.ch4_result.values())[1]], [list(ev.ch4_result.values())[2]]] the_table = plt.table(cellText=table_vals, cellColours=None, cellLoc='right', colWidths=[0.3] * 3, rowLabels=['mean', 'median', 'stdev'], rowColours=None, rowLoc='left', colLabels=['value'], colColours=None, colLoc='center', loc='upper right', bbox=None) plt.plot(datePlot, ch4Plot, label='ch4') plt.xlabel('Time since start of cal_event [seconds]') plt.ylabel('Compounds') plt.title('ch4') plt.show() prev_ind = ind # set previous index as current # Calculate the CO, CO2, and Methane results with Brendan's functions for ev in cal_events: filter_postcal_data( ev, session) # filter following min of ambient data if ev.date - ev.dates[0] < dt.timedelta( seconds=90): # events under 90 seconds are dumped ev.standard_used = 'dump' # assign dump name session.merge(ev) # merge results with session # otherwise, iterate over each compound and calculate results else: for cpd in ['co', 'co2', 'ch4']: time = 21 ev.calc_result( cpd, time) # results are calced (time) seconds back session.merge(ev) # merge results with session # Save to your local copy of the database & check results session.commit() # commit results to session # Create a timeseries of the results to ascertain what portion of the data we want to keep # Integrate with Brendan's code once tested for errors return True
async def check_load_run_logs(logger): """ Read new log files and create new GcRun and Sample objects if possible. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from summit_core import methane_logs_path from summit_core import methane_dir as rundir from summit_core import get_all_data_files, connect_to_db from summit_methane import Base, GcRun, Sample, read_log_file except ImportError as e: logger.error('ImportError occurred in check_load_run_logs()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_load_pa_log()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running check_load_run_logs()') runs_in_db = session.query(GcRun).all() samples = session.query(Sample) sample_count = samples.count() run_dates = [r.date for r in runs_in_db] files = get_all_data_files(methane_logs_path, '.txt') runs = [] for file in files: runs.append(read_log_file(file)) new_run_count = 0 # count runs added for run in runs: if run.date not in run_dates: session.add(run) logger.info(f'GcRun for {run.date} added.') new_run_count += 1 if not new_run_count: logger.info('No new GcRuns added.') else: session.commit() new_sample_count = session.query(Sample).count() - sample_count logger.info( f'{new_run_count} GcRuns added, containing {new_sample_count} Samples.' ) if new_run_count * 10 != new_sample_count: logger.warning( 'There were not ten Samples per GcRun as expected.') session.close() engine.dispose() return True except Exception as e: session.close() engine.dispose() logger.error(f'Exception {e.args} occurred in check_load_pa_log()') send_processor_email(PROC, exception=e) return False
async def read_excel_sheet(logger): logger.info('Running update_excel_sheet()') try: import pandas as pd from datetime import datetime from summit_core import methane_dir as rundir from summit_errors import send_processor_warning from summit_methane import GcRun, Base, add_formulas_and_format_sheet from summit_core import Config, connect_to_db, append_df_to_excel from summit_core import methane_dir, core_dir, data_file_paths methane_sheet = data_file_paths.get('methane_sheet', None) if not methane_sheet: logger.error( 'Filepath for the methane integration sheet could not be retrieved.' ) send_processor_warning( PROC, 'Filepath Error', '''The methane integration sheet filepath could not be retrieved. It should be listed as "methane_sheet" in file_locations.json in the core folder.''' ) return False except ImportError as e: logger.error('ImportError occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Config.__table__.create(core_engine, checkfirst=True) methane_sheet_read_config = (core_session.query(Config).filter( Config.processor == 'methane_sheet_read').one_or_none()) if not methane_sheet_read_config: methane_sheet_read_config = Config(processor='methane_sheet_read') # use all default values except processor on init core_session.add(methane_sheet_read_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: core_session.merge(methane_sheet_read_config) core_session.commit() session.close() engine.dispose() core_session.close() core_engine.dispose() return True except Exception as e: session.close() engine.dispose() core_session.close() core_engine.dispose() logger.error(f'Exception {e.args} occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False
statements for the changing parameter bounds based on the state of the H20 Trap and the Absorbent Trap. However, the code runs slower, and currently every single log in the database has the default state for the traps, thus I've decided to keep this one as an alternate and let you choose if theres a better way to implement this feature without losing performence. """ import datetime as dt from pathlib import Path from datetime import datetime import summit_core from summit_core import connect_to_db, TempDir, Config from summit_core import voc_dir, core_dir from summit_voc import LogFile import pandas as pd engine, session = connect_to_db('sqlite:///Jsummit_voc.sqlite', voc_dir) core_engine, core_session = connect_to_db('sqlite:///Jsummit_core.sqlite', core_dir) Config.__table__.create(core_engine, checkfirst=True) logcheck_config = core_session.query(Config).filter( Config.processor == 'Log Checking').one_or_none() if not logcheck_config: logcheck_config = Config( processor='Log Checking', days_to_plot=21) # use all default values except processor on init core_session.add(logcheck_config) core_session.commit()
def retrieve_new_files(logger): from summit_core import connect_to_db, list_files_recur, split_into_sets_of_n logger.info('Running retrieve_new_files()') con = connect_to_sftp() engine, session = connect_to_db('sqlite:///zugspitze.sqlite', CORE_DIR) for path in ['folder1', 'folder2', 'folder3']: logger.info(f'Processing {path} files.') local_path = CORE_DIR / path remote_path = REMOTE_BASE_PATH + f'/{path}' all_remote_files = list_remote_files_recur( con, remote_path) # get a list of all SFTPAttributes + paths all_local_files = [str(p) for p in list_files_recur(local_path) ] # get all local file paths new_remote_files = [] for remote_file in all_remote_files: new_remote_files.append( RemoteFile(remote_file.st_mtime, remote_file.path)) # create DB objects for all remote paths new_local_files = [] for remote_file in all_local_files: new_local_files.append( LocalFile(os.stat(remote_file).st_mtime, remote_file)) # create DB objects for all local paths remote_sets = split_into_sets_of_n( [r.path for r in new_remote_files], 750) # don't exceed 1K sqlite var limit local_sets = split_into_sets_of_n([l.path for l in new_local_files], 750) # loop through remote, then local filesets to check against DB and commit any new ones for Filetype, filesets, new_files in zip( [RemoteFile, LocalFile], [remote_sets, local_sets], [new_remote_files, new_local_files]): paths_in_db = [] for set_ in filesets: in_db = session.query(Filetype.path).filter( Filetype.path.in_(set_)).all() if in_db: paths_in_db.extend(in_db) for file in new_files: if file.path in paths_in_db: file_in_db = session.query(Filetype).filter( Filetype.path == file.path).one_or_none() if file.st_mtime > file_in_db.st_mtime: file_in_db.st_mtime = file.st_mtime session.merge(file_in_db) else: session.add(file) session.commit() # commit at the end of each filetype # local and remote files are now completely up-to-date in the database files_to_retrieve = [] remote_files = session.query(RemoteFile).order_by( RemoteFile.relpath).all() local_files = session.query(LocalFile).order_by( LocalFile.relpath).all() for remote_file in remote_files: if remote_file.local is None: local_match = search_for_attr_value(local_files, 'relpath', remote_file.relpath) if local_match: remote_file.local = local_match if remote_file.st_mtime > local_match.st_mtime: files_to_retrieve.append( remote_file ) # add the remote file to download if st_mtime is greater else: files_to_retrieve.append( remote_file ) # add the remote file if there's no local copy (create later) else: if remote_file.st_mtime > remote_file.local.st_mtime: files_to_retrieve.append(remote_file) logger.info(f'Remote files: {len(remote_files)}') logger.info(f'Local files: {len(local_files)}') logger.info( f'{len(files_to_retrieve)} file need updating or retrieval.') ct = 0 for remote_file in files_to_retrieve: if remote_file.local is not None: con.get(remote_file.path, remote_file.local.path ) # get remote file and put in the local's path remote_file.local.st_mtime = remote_file.st_mtime # update, then merge session.merge(remote_file) logger.info(f'Remote file {remote_file.relpath} was updated.') ct += 1 else: new_local_path = CORE_DIR / remote_file.relpath.lstrip('/') scan_and_create_dir_tree( new_local_path ) # scan the path and create any needed folders new_local_path = str(new_local_path) # revert to string con.get( remote_file.path, new_local_path) # get file and put in it's relative place new_local = LocalFile(remote_file.st_mtime, new_local_path) new_local.remote = remote_file session.add( new_local ) # create, relate, and add the local file that was transferred session.merge(remote_file) logger.info( f'Remote file {remote_file.relpath} was retrieved and added to local database.' ) ct += 1 if ct % 100 == 0: session.commit() # routinely commit files in batches of 100 session.commit() session.close() engine.dispose()
async def load_excel_corrections(logger): try: import pandas as pd from pathlib import Path from summit_voc import Peak, LogFile, NmhcLine, NmhcCorrection, GcRun from summit_core import connect_to_db, search_for_attr_value from summit_core import voc_dir as rundir except ImportError as e: logger.error('ImportError occurred in load_excel_corrections()') return False data = pd.read_excel('Ambient_2019.xlsx', header=None, usecols=check_cols).dropna(axis=1, how='all') data = data.set_index([0]) # set first row of df to the index data.index = data.index.str.lower() data = data[ data. columns[: -1]] # drop last row (column?) of DF (the one with 'END' in it) engine, session = connect_to_db('sqlite:///summit_voc.sqlite', rundir) logfiles = session.query(LogFile).order_by(LogFile.samplecode) nmhc_lines = session.query(NmhcLine).filter( NmhcLine.correction_id == None).order_by(NmhcLine.id) gc_runs = session.query(GcRun).order_by(GcRun.id) nmhc_corrections = [] corrections_in_db = session.query(NmhcCorrection).all() correction_dates_in_db = [c.date for c in corrections_in_db] with session.no_autoflush: for col_name in data.columns.tolist(): col = data.loc[:, col_name] nmhc_corrections.append( correction_from_df_column(col, logfiles, nmhc_lines, gc_runs, logger)) for correction in nmhc_corrections: if correction: if correction.date not in correction_dates_in_db: session.add(correction) logger.info(f'Correction for {correction.date} added.') session.commit() nmhc_corrections = session.query(NmhcCorrection).filter( NmhcCorrection.status == 'unapplied').all() # re-get all added corrections that haven't been applied for correction in nmhc_corrections: if correction: line = session.query(NmhcLine).filter( NmhcLine.correction_id == correction.id).one_or_none() if not line: logger.info( f'A matching line for NmhcCorrection {correction} was not found.' ) continue else: continue for peak_corr in correction.peaklist: peak_by_name = search_for_attr_value(line.peaklist, 'name', peak_corr.name) peak_by_rt = search_for_attr_value(line.peaklist, 'rt', peak_corr.rt) if (peak_by_name and peak_by_rt) and ( peak_by_name is peak_by_rt): # if they're not None, and identical peak = peak_by_name else: if peak_by_name and peak_by_rt: # if both exist, but not identical, prefer the RT-found one peak_by_name.name = '-' peak_by_rt.name = peak_corr.name peak = peak_by_rt session.merge(peak) session.merge(peak_by_name) elif peak_by_name: peak = peak_by_name session.merge(peak) elif peak_by_rt: peak = peak_by_rt peak.name = peak_corr.name session.merge(peak) else: logger.warning( f"Peak with name {peak_corr.name} or retention time of {peak_corr.rt} from " + f"NmhcCorrection {correction.date} not found in NmhcLine for {line.date}" ) continue if peak.pa != peak_corr.pa: peak.pa = peak_corr.pa peak.rt = peak_corr.rt peak.rev = peak.rev + 1 # Sqlite *does not* like using += notation correction.status = 'applied' line.nmhc_corr_con = correction correction.correction_id = line session.merge(correction) session.merge(line) logger.info(f'Successful peak corrections made to {line.date}') session.commit()
def brendan_test(): from datetime import datetime import pandas as pd from summit_core import connect_to_db, merge_lists, search_for_attr_value from summit_core import methane_dir from summit_methane import SampleCorrection, Base, GcRun filename = r'Z:\Data\Summit_GC\Summit_GC_2019\CH4_results\Methane_Automated_2019.xlsx' #filename = r'/home/brendan/PycharmProjects/Summit/processors/summit_methane_processor/SUM_CH4_insitu_2019.xlsx' # Brendan's path year = filename.split('.')[-2][-4:] # risky... engine, session = connect_to_db('sqlite:///summit_methane_tester.sqlite', methane_dir) Base.metadata.create_all(engine) data = pd.read_excel(filename, sheet_name='Sheet1') indices = data['date'].dropna(how='all').index.tolist() for ind in indices: if ind % 5 is not 0: date = data.loc[ind, 'date'] filename = data.loc[ind, 'filename'] print( f'File {filename} for run {date} did not have the proper number of lines to analyze.' ) # can't happen indices = [i for i in indices if i % 5 is 0] # remove any failed after warning above gc_runs = session.query(GcRun) ct = 0 for ind in indices: run_date = data.loc[ind, 'date'].to_pydatetime() matched_run = gc_runs.filter(GcRun.date == run_date).one_or_none() if not matched_run: print(f'No run matched for {run_date}.') continue # for now... run_set = data.loc[ind:ind + 6, ['peak1', 'peak2']].dropna(axis=1, how='all') if not run_set.columns.tolist( ): # if the subset of peak1 and peak2 is empty after dropping any where all = na # print('WARNING - LOG ME') continue peaks1 = run_set['peak1'].values.tolist( ) # column of peaks, ordered 1,3,5,7,9 peaks2 = run_set['peak2'].values.tolist( ) # column of peaks, ordered 2,4,6,8,10 ordered_peaks = merge_lists( peaks1, peaks2) # returns peaks in order [0,1,2,3,4, ..., 9] corrections = [] for num, pa in enumerate(ordered_peaks): """ Finding samples in db: Use DOY, hour to find the run, then use run.id to get samples iteratively, if sample of x num does not exist, warn/ log an error (should have been created when reading log) """ matched_sample = search_for_attr_value(matched_run.samples, 'sample_num', num) if not matched_sample: print( f'Matched sample not found for sample number {num} in GcRun for {matched_run.date}.' ) continue corrections.append(SampleCorrection(num, pa, matched_sample)) # for sample in corrections: # print(sample) # print(sample.sample_num) # print(sample.pa) for corr in corrections: # TODO: Check for already present in DB session.merge(corr) ct += 1 if ct > 50: continue # get number of data points for each day/hour combo # print(counts.where(counts != 5).dropna(how='all', axis='rows')) # warn these exist, they can't be safely interpreted session.commit() session.close() engine.dispose()
async def match_peaks_to_samples(logger): """ All detected peaks in a run are attached to NmhcLines, but are not linked to Samples until they've passed certain criteria. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from summit_core import methane_dir as rundir from summit_core import connect_to_db, split_into_sets_of_n from summit_methane import Peak, Sample, GcRun, Base, sample_rts from operator import attrgetter import datetime as dt except ImportError as e: logger.error('ImportError occurred in match_peaks_to_samples()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in match_peaks_to_samples()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running match_peaks_to_samples()') unmatched_samples = session.query(Sample).filter( Sample.peak_id == None, Sample.run_id != None).all() whole_set = list({s.run_id for s in unmatched_samples}) # SQLite can't take in clauses with > 1000 variables, so chunk to sets of 500 if len(whole_set) > 500: # subdivide set sets = split_into_sets_of_n(whole_set, 500) else: sets = [ whole_set ] # TODO: Can be reduced to just splitting, this step is done automatically by split_into. runs_w_unmatched_samples = [] for set in sets: runs_w_unmatched_samples.extend( (session.query(GcRun).filter(GcRun.id.in_(set)).all() )) # create set of runs that require processing for run in runs_w_unmatched_samples: # loop through runs containing samples that haven't been matched with peaks samples = session.query(Sample).filter( Sample.run_id == run.id).all() peaks = session.query(Peak).filter( Peak.pa_line_id == run.pa_line_id) for sample in samples: sn = sample.sample_num potential_peaks = peaks.filter( Peak.rt.between(sample_rts[sn][0], sample_rts[sn][1])).all() # filter for peaks in this gc run between the expected retention times given in sample_rts if len(potential_peaks): # currently, the criteria for "this is the real peak" is "this is the biggest peak" peak = max(potential_peaks, key=attrgetter('pa')) if peak: sample.peak = peak peak.name = 'CH4_' + str(sample.sample_num) sample.date = run.pa_line.date + dt.timedelta( minutes=peak.rt - 1) session.merge(sample) session.commit() session.close() engine.dispose() return True except Exception as e: logger.error(f'Excetion {e.args} occurred in match_peaks_to_samples()') send_processor_email(PROC, exception=e) return False
async def create_mastercals(logger): """ Searches all un-committed CalEvents, looking for (high, middle, low) sets that can then have a curve and other stats calculated. It will report them as DEBUG items in the log. :param logger: logging logger at module level :return: boolean, did it run/process new data? """ logger.info('Running create_mastercals()') try: from summit_core import picarro_dir as rundir from summit_core import connect_to_db from summit_picarro import MasterCal, CalEvent, match_cals_by_min import matplotlib.pyplot as plt import seaborn as sns import numpy as np except Exception as e: logger.error('ImportError occured in create_mastercals()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_picarro.sqlite', rundir) except Exception as e: logger.error( f'Exception {e.args} prevented connection to database in create_mastercals()' ) send_processor_email(PROC, exception=e) return False try: # Get cals by standard, but only if they're not in another MasterCal already lowcals = (session.query(CalEvent).filter( CalEvent.mastercal_id == None, CalEvent.standard_used == 'low_std').all()) highcals = (session.query(CalEvent).filter( CalEvent.mastercal_id == None, CalEvent.standard_used == 'high_std').all()) midcals = (session.query(CalEvent).filter( CalEvent.mastercal_id == None, CalEvent.standard_used == 'mid_std').all()) mastercals = [] for lowcal in lowcals: matching_high = match_cals_by_min(lowcal, highcals, minutes=5) if matching_high: matching_mid = match_cals_by_min(matching_high, midcals, minutes=5) if matching_mid: mastercals.append( MasterCal([lowcal, matching_high, matching_mid])) if mastercals: for mc in mastercals: # calculate curve from low - high point, and check middle distance mc.create_curve() session.add(mc) logger.info(f'MasterCal for {mc.subcals[0].date} created.') session.commit() return True else: logger.info('No MasterCals were created.') return False except Exception as e: logger.error(f'Exception {e.args} occured in create_mastercals()') send_processor_email(PROC, exception=e) return False
async def quantify_samples(logger): """ On a per-run basis, use std1 to calc samples 1-5 (~3) and std2 to calculate samples 6-10 (~8). Output warnings if only one standard in a sample is valid. :param logger: logger, to log events to :return: Boolean, True if successful """ try: from summit_core import methane_dir as rundir from summit_core import connect_to_db, search_for_attr_value from summit_methane import Standard, GcRun, Base from summit_methane import calc_ch4_mr, valid_sample except Exception as e: logger.error('ImportError occurred in qunatify_samples()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_load_pa_log()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running quantify_samples()') unquantified_runs = session.query(GcRun).filter( GcRun.median == None).all() ct = 0 for run in unquantified_runs: # TODO: Move the majority of this to class methods for GcRuns; will make editing integrations WAY easier samples = run.samples standard = ( session.query(Standard).filter( run.date >= Standard.date_st, run.date < Standard.date_en).first() ) # TODO; Set unique constraints on standards, revert to one_or_none() if standard is not None: ambients = [ sample for sample in samples if (sample.sample_type == 3 and valid_sample(sample)) ] standard1 = search_for_attr_value(samples, 'sample_num', 2) standard2 = search_for_attr_value(samples, 'sample_num', 7) if not ambients: logger.warning( f'No ambient samples were quantifiable in GcRun for {run.date}' ) continue if (not valid_sample(standard1)) and ( not valid_sample(standard2)): logger.warning( f'No valid standard samples found in GcRun for {run.date}.' ) continue elif not valid_sample(standard1): # use std2 for all ambient quantifications logger.info( f'Only one standard used for samples in GcRun for {run.date}' ) for amb in ambients: amb = calc_ch4_mr(amb, standard2, standard) elif not valid_sample(standard2): # use std1 for all ambient quantifications logger.info( f'Only one standard used for samples in GcRun for {run.date}' ) for amb in ambients: amb = calc_ch4_mr(amb, standard1, standard) else: # use std1 for ambients 0-4 and std2 for ambients 5-9 for amb in ambients: if amb.sample_num < 5: amb = calc_ch4_mr(amb, standard1, standard) else: amb = calc_ch4_mr(amb, standard2, standard) run.standard_rsd = ( s.stdev([standard1.peak.pa, standard2.peak.pa]) / s.median([standard1.peak.pa, standard2.peak.pa])) from summit_methane import plottable_sample all_run_mrs = [ amb.peak.mr for amb in ambients if plottable_sample(amb) ] # do basic filtering for calculating run medians if all_run_mrs: run.median = s.median(all_run_mrs) if len(all_run_mrs) > 1: run.rsd = s.stdev(all_run_mrs) / run.median session.merge(run) # merge only the run, it contains and cascades samples, palines and peaks that were changed ct += 1 else: logger.warning( f'No standard value found for GcRun at {run.date}.') session.commit() if ct: logger.info(f'{ct} GcRuns were successfully quantified.') session.close() engine.dispose() return True else: logger.info('No GcRuns quantified.') session.close() engine.dispose() return False except Exception as e: logger.error(f'Exception {e.args} occurred in quantify_samples()') send_processor_email(PROC, exception=e) return False
async def plot_new_data(logger): """ Checks data against the last plotting time, and creates new plots for CO, CO2, and CH4 if new data exists. :param logger: logging logger at module level :return: boolean, did it run/process new data? """ logger.info('Running plot_new_data()') try: from pathlib import Path from summit_core import picarro_dir as rundir from summit_core import create_daily_ticks, connect_to_db, TempDir, Plot, core_dir, Config, add_or_ignore_plot from summit_picarro import Base, Datum, summit_picarro_plot plotdir = rundir / 'plots' remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots' except Exception as e: logger.error('ImportError occurred in plot_new_data()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_picarro.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error(f'Exception {e.args} occurred in plot_new_data()') send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) picarro_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not picarro_config: picarro_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(picarro_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: newest_data_point = (session.query(Datum.date).filter( Datum.mpv_position == 1).order_by(Datum.date.desc()).first()[0]) if newest_data_point <= picarro_config.last_data_date: logger.info('No new data was found to plot.') core_session.close() core_engine.dispose() session.close() engine.dispose() return False picarro_config.last_data_date = newest_data_point core_session.add(picarro_config) date_limits, major_ticks, minor_ticks = create_daily_ticks( picarro_config.days_to_plot) all_data = ( session.query(Datum.date, Datum.co, Datum.co2, Datum.ch4).filter(( Datum.mpv_position == 0) | (Datum.mpv_position == 1)).filter( (Datum.instrument_status == 963), (Datum.alarm_status == 0)).filter( Datum.date >= date_limits['left'] ) # grab only data that falls in plotting period .all()) if not all_data: logger.info('No new data was found to plot.') core_session.close() core_engine.dispose() session.close() engine.dispose() return False # get only ambient data dates = [] co = [] co2 = [] ch4 = [] for result in all_data: dates.append(result.date) co.append(result.co) co2.append(result.co2) ch4.append(result.ch4) with TempDir(plotdir): from summit_core import five_minute_medians dates_co, co = five_minute_medians(dates, co) name = summit_picarro_plot(None, ({ 'Summit CO': [dates_co, co] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 60, 'top': 180 }, major_ticks=major_ticks, minor_ticks=minor_ticks) co_plot = Plot(plotdir / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(co_plot, core_session) name = summit_picarro_plot(None, ({ 'Summit CO2': [dates, co2] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 400, 'top': 420 }, major_ticks=major_ticks, minor_ticks=minor_ticks, unit_string='ppmv') co2_plot = Plot(plotdir / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(co2_plot, core_session) name = summit_picarro_plot(None, ({ 'Summit Methane [Picarro]': [dates, ch4] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 1850, 'top': 2050 }, major_ticks=major_ticks, minor_ticks=minor_ticks) ch4_plot = Plot(plotdir / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(ch4_plot, core_session) logger.info('New data plots were created.') session.close() engine.dispose() core_session.commit() core_session.close() core_engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in plot_new_data()') send_processor_email(PROC, exception=e) session.close() engine.dispose() core_session.close() core_engine.dispose() return False
async def plot_new_data(logger): """ If newer data exists, plot it going back one week from the day of the plotting. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from pathlib import Path from summit_core import core_dir, Config from summit_core import methane_dir as rundir from summit_core import connect_to_db, create_daily_ticks, TempDir, Plot, add_or_ignore_plot from summit_methane import Sample, GcRun, Base, plottable_sample, summit_methane_plot remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots' except ImportError as e: logger.error('ImportError occurred in plot_new_data()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) ch4_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not ch4_config: ch4_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(ch4_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running plot_new_data()') engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) runs_with_medians = (session.query(GcRun).filter( GcRun.median != None).filter(GcRun.standard_rsd < .02).filter( GcRun.rsd < .02).order_by(GcRun.date).all()) last_ambient_date = runs_with_medians[-1].date # get date after filtering, ie don't plot if there's no new data getting plotted date_limits, major_ticks, minor_ticks = create_daily_ticks( ch4_config.days_to_plot) if last_ambient_date > ch4_config.last_data_date: ambient_dates = [run.date for run in runs_with_medians] ambient_mrs = [run.median for run in runs_with_medians] with TempDir(rundir / 'plots'): name = summit_methane_plot( None, {'Summit Methane [GC]': [ambient_dates, ambient_mrs]}, limits={ 'bottom': 1850, 'top': 2050, 'right': date_limits.get('right', None), 'left': date_limits.get('left', None) }, major_ticks=major_ticks, minor_ticks=minor_ticks) methane_plot = Plot(rundir / 'plots' / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(methane_plot, core_session) ch4_config.last_data_date = last_ambient_date core_session.merge(ch4_config) logger.info('New data plots created.') else: logger.info('No new data found to be plotted.') session.close() engine.dispose() core_session.commit() core_session.close() core_engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in plot_new_data()') send_processor_email(PROC, exception=e) core_session.close() core_engine.dispose() session.close() engine.dispose() return False
async def plot_dailies(logger): """ Loads dailies for the last 3 weeks and plots with ticks for every three days and minor ticks for every day. Plots are registered with the core database so they're uploaded to the Taylor drive. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from pathlib import Path import datetime as dt from summit_core import connect_to_db, core_dir, TempDir, Config, Plot, add_or_ignore_plot, create_daily_ticks plotdir = core_dir / 'plots/daily' remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/protected/plots' try: os.chdir(plotdir) except FileNotFoundError: os.mkdir(plotdir) except ImportError as e: logger.error(f'ImportError occurred in plot_dailies()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_daily.sqlite', core_dir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Error {e.args} prevented connecting to the database in plot_dailies()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) daily_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not daily_config: daily_config = Config( processor=PROC, days_to_plot=21 ) # use all default values except processor on init core_session.add(daily_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running plot_dailies()') date_ago = datetime.now() - dt.timedelta( days=daily_config.days_to_plot + 1) # set a static for retrieving data at beginning of plot cycle date_limits, major_ticks, minor_ticks = create_daily_ticks( daily_config.days_to_plot, minors_per_day=1) major_ticks = [t for ind, t in enumerate(major_ticks) if ind % 3 == 0] # use every third daily tick dailies = session.query(Daily).filter(Daily.date >= date_ago).order_by( Daily.date).all() dailydict = {} for param in daily_parameters: dailydict[param] = [getattr(d, param) for d in dailies] with TempDir(plotdir): ## PLOT i-butane, n-butane, acetylene name = summit_daily_plot(dailydict.get('date'), ({ 'Ads Xfer A': [None, dailydict.get('ads_xfer_a')], 'Ads Xfer B': [None, dailydict.get('ads_xfer_b')], 'Valves Temp': [None, dailydict.get('valves_temp')], 'GC Xfer Temp': [None, dailydict.get('gc_xfer_temp')], 'Catalyst': [None, dailydict.get('catalyst')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 475 }, major_ticks=major_ticks, minor_ticks=minor_ticks) hot_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(hot_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'CJ1 Temp': [None, dailydict.get('cj1')], 'CJ2 Temp': [None, dailydict.get('cj2')], 'Standard Temp': [None, dailydict.get('std_temp')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 10, 'top': 50 }, major_ticks=major_ticks, minor_ticks=minor_ticks) room_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(room_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'H2 Gen Pressure': [None, dailydict.get('h2_gen_p')], 'Line Pressure': [None, dailydict.get('line_p')], 'Zero Pressure': [None, dailydict.get('zero_p')], 'FID Pressure': [None, dailydict.get('fid_p')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 75 }, y_label_str='Pressure (PSI)', major_ticks=major_ticks, minor_ticks=minor_ticks) pressure_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(pressure_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'Inlet Short Temp': [None, dailydict.get('inlet_short')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 60 }, major_ticks=major_ticks, minor_ticks=minor_ticks) inlet_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(inlet_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'Battery V': [None, dailydict.get('battv')], '12Va': [None, dailydict.get('v12a')], '15Va': [None, dailydict.get('v15a')], '15Vb': [None, dailydict.get('v15b')], '24V': [None, dailydict.get('v24')], '5Va': [None, dailydict.get('v5a')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 30 }, y_label_str='Voltage (v)', major_ticks=major_ticks, minor_ticks=minor_ticks) voltage_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(voltage_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'MFC1': [None, dailydict.get('mfc1')], 'MFC2': [None, dailydict.get('mfc2')], 'MFC3a': [None, dailydict.get('mfc3a')], 'MFC3b': [None, dailydict.get('mfc3b')], 'MFC4': [None, dailydict.get('mfc4')], 'MFC5': [None, dailydict.get('mfc5')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': -1, 'top': 3.5 }, y_label_str='Flow (Ml/min)', major_ticks=major_ticks, minor_ticks=minor_ticks) flow_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(flow_plot, core_session) core_session.commit() core_session.close() core_engine.dispose() session.close() engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in plot_dailies()') send_processor_email(PROC, exception=e) session.close() engine.dispose() return False
from datetime import datetime from summit_core import connect_to_db, voc_dir from summit_voc import LogFile engine, session = connect_to_db('sqlite:///summit_voc.sqlite', voc_dir) logfiles = session.query(LogFile).filter( LogFile.date > datetime(2019, 3, 8)).all() paramBounds = ({ 'samplepressure1': (1.5, 2.65), 'samplepressure2': (6.5, 10), 'GCHeadP': (5, 7.75), 'GCHeadP1': (9, 13), 'chamber_temp_start': (18, 30), 'WT_primary_temp_start': (-35, -24), 'WT_secondary_temp_start': (18, 35), 'ads_secondary_temp_start': (18, 35), 'ads_primary_temp_start': (-35, -24), 'chamber_temp_end': (18, 30), 'WT_primary_temp_end': (-35, -24), 'WT_secondary_temp_end': (15, 35), 'ads_secondary_temp_end': (15, 35), 'ads_primary_temp_end': (-35, -24), 'traptempFH': (-35, 0), 'GCstarttemp': (35, 45), 'traptempinject_end': (285, 310), 'traptempbakeout_end': (310, 335), 'WT_primary_hottemp': (75, 85), 'WT_secondary_hottemp': (20, 35), 'GCoventemp': (190, 210)