async def check_existing_errors(logger, active_errors=None): logger.info('Running check_existing_errors()') try: if not active_errors: logger.info( 'Check_existing_errors() called without any active errors.') return False for ind, err in enumerate(active_errors): if err.reason == 'no new data': if err.is_resolved( processor=err.email_template.processor, last_data_time=err.email_template.last_data_time, logger=logger): active_errors[ind] = None else: logger.info('Error aside from "no new data" was found.') pass # is_resolved() handles logging in both cases active_errors = [err for err in active_errors if err] return active_errors except Exception as e: logger.error(f'Exception {e.args} occurred in check_existing_errors()') send_processor_email(PROC, exception=e) return active_errors
async def check_send_plots(logger): """ Look through all plots staged to be uploaded and remove them if successfully uploaded. :param logger: logging logger to log to :return: boolean, True if ran without errors """ try: from summit_errors import send_processor_email except ImportError: logger.error('ImportError occurred in check_send_plots()') return False try: engine, session = connect_to_db('sqlite:///summit_core.sqlite', core_dir) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_send_plots()' ) send_processor_email('Core', exception=e) return False try: plots_to_upload = session.query(Plot).filter(Plot.staged == True) remote_dirs = set([p.remote_path for p in plots_to_upload.all()]) for remote_dir in remote_dirs: plot_set = plots_to_upload.filter( Plot.remote_path == remote_dir).all() if plot_set: paths_to_upload = [p.path for p in plot_set] successes = await send_files_sftp(paths_to_upload, remote_dir) for plot, success in zip(plots_to_upload, successes): if success: logger.info(f'Plot {plot.name} uploaded to website.') session.delete(plot) else: logger.warning(f'Plot {plot.name} failed to upload.') session.commit() session.close() engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in check_send_plots().') send_processor_email('Core', exception=e) session.close() engine.dispose() return False
async def main(): try: from summit_core import configure_logger from summit_core import error_dir as rundir logger = configure_logger(rundir, __name__) except Exception as e: print('Error logger could not be configured') send_processor_email(PROC, exception=e) return False errors = [] while True: errors = await asyncio.create_task( check_for_new_data(logger, active_errors=errors)) await asyncio.create_task( check_existing_errors(logger, active_errors=errors))
async def main(): try: from summit_core import picarro_dir as rundir from summit_core import configure_logger logger = configure_logger(rundir, __name__) except Exception as e: print(f'Error {e.args} prevented logger configuration.') send_processor_email(PROC, exception=e) return try: if await asyncio.create_task(check_load_new_data(logger)): if await asyncio.create_task(find_cal_events(logger)): await asyncio.create_task(create_mastercals(logger)) await asyncio.create_task(plot_new_data(logger)) return True except Exception as e: logger.error(f'Exception {e.args} occurred in Picarro main()') send_processor_email(PROC, exception=e) return False
async def check_for_new_data(logger, active_errors=None): reason = 'no new data' if not active_errors: active_errors = [] try: logger.info('Running check_for_new_data()') for proc, time_limit in zip( ['voc', 'methane', 'picarro'], [dt.timedelta(hours=hr) for hr in [8, 3, 2]]): last_data_time = get_last_processor_date(proc, logger) if not last_data_time: logger.warning(f'No data available to compare for {proc}.') continue if datetime.now() - last_data_time > time_limit: if matching_error(active_errors, reason, proc): logger.error( f'Error for {reason} for the {proc} processor is already active and was ignored.' ) continue else: active_errors.append( Error(reason, new_data_found, NewDataEmail(sender, proc, last_data_time))) return active_errors except Exception as e: logger.error(f'Exception {e.args} occurred in check_for_new_data()') send_processor_email(PROC, exception=e) return False
async def match_runs_to_lines(logger): """ Read new log files and create new GcRun and Sample objects if possible. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from summit_core import methane_dir as rundir from summit_core import connect_to_db from summit_methane import GcRun, PaLine, match_lines_to_runs, Base except ImportError as e: send_processor_email(PROC, exception=e) logger.error('ImportError occured in match_runs_to_lines()') return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_load_pa_log()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running match_runs_to_peaks()') engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) unmatched_lines = session.query(PaLine).filter( PaLine.run == None).all() unmatched_runs = session.query(GcRun).filter( GcRun.pa_line_id == None).all() # married_runs_count = session.query(GcRun).filter(GcRun.status == 'married').count() lines, runs, count = match_lines_to_runs(unmatched_lines, unmatched_runs) session.commit() if count: logger.info(f'{count} GcRuns matched with PaLines.') return True else: logger.info('No new GcRun-PaLine pairs matched.') return False except Exception as e: logger.error(f'Exception {e.args} occurred in match_runs_to_lines()') send_processor_email(PROC, exception=e) return False
async def main(): """ Configure a logger and run processes in order, only proceeding if new data is created that warrants running the next processes. :return: Boolean, True if successful """ try: from summit_core import methane_dir as rundir from summit_core import configure_logger logger = configure_logger(rundir, __name__) except Exception as e: print(f'Error {e.args} prevented logger configuration.') send_processor_email(PROC, exception=e) return try: new_pas = await asyncio.create_task(check_load_pa_log(logger)) new_logs = await asyncio.create_task(check_load_run_logs(logger)) if new_pas or new_logs: if await asyncio.create_task(match_runs_to_lines(logger)): if await asyncio.create_task(match_peaks_to_samples(logger)): await asyncio.create_task(add_one_standard(logger)) if await asyncio.create_task(quantify_samples(logger)): await asyncio.create_task(plot_new_data(logger)) await asyncio.create_task(update_excel_sheet(logger)) return True except Exception as e: logger.critical( f'Exception {e.args} caused a complete failure of the CH4 processing.' ) send_processor_email(PROC, exception=e) return False
async def add_one_standard(logger): """ Add a single standard (the current working one), so that quantifications are possible. VERY TEMPORARY. :param logger: logger, to log events to :return: Boolean, True if successful """ try: from summit_core import methane_dir as rundir from summit_core import connect_to_db from summit_methane import Standard, Base except ImportError as e: logger.error('ImportError occurred in add_one_standard()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_load_pa_log()' ) send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) current_standard_dates = [ S.date_st for S in session.query(Standard).all() ] my_only_standard = Standard('ws_2019', 2067.16, datetime(2019, 1, 1), datetime(2019, 12, 31)) if my_only_standard.date_st not in current_standard_dates: session.merge(my_only_standard) session.commit() session.close() engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in add_one_standard()') send_processor_email(PROC, exception=e) return False
async def check_load_new_data(logger): """ Checks for new files, checks length of old ones for updates, and processes/commits new data to the database. :param logger: logging logger at module level :return: boolean, did it run/process new data? """ logger.info('Running check_load_new_data()') try: from summit_core import picarro_logs_path as data_path from summit_core import picarro_dir as rundir from summit_core import connect_to_db, get_all_data_files, check_filesize from summit_picarro import Base, DataFile, Datum from sqlalchemy.orm.exc import MultipleResultsFound from summit_errors import EmailTemplate, sender, processor_email_list from pandas.errors import ParserError except ImportError as e: logger.error('ImportError occurred in check_load_new_data()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_picarro.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} caused database connection to fail in check_load_new_data()' ) send_processor_email(PROC, exception=e) return False try: db_files = session.query(DataFile) db_filenames = [d.name for d in db_files.all()] all_available_files = get_all_data_files(data_path, '.dat') files_to_process = session.query(DataFile).filter( DataFile.processed == False).all() for file in all_available_files: try: db_match = db_files.filter( DataFile._name == file.name).one_or_none() except MultipleResultsFound: logger.warning( f'Multiple results found for file {file.name}. The first was used.' ) db_match = db_files.filter(DataFile._name == file.name).first() if file.name not in db_filenames: files_to_process.append(DataFile(file)) elif check_filesize(file) > db_match.size: # if a matching file was found and it's now bigger, append for processing logger.info( f'File {file.name} had more data and was added for procesing.' ) files_to_process.append(db_match) if not files_to_process: logger.warning('No new data was found.') return False for ind, file in enumerate(files_to_process): files_to_process[ind] = session.merge( file ) # merge files and return the merged object to overwrite the old logger.info(f'File {file.name} added for processing.') session.commit() for file in files_to_process: try: df = pd.read_csv(file.path, delim_whitespace=True) except EmptyDataError as e: logger.error( f'Exception {e.args} occurred while reading {file.name}') send_processor_email(PROC, exception=e) continue except ParserError as e: logger.error( f'Pandas ParserError occurred while reading {file.name}.') from summit_errors import send_processor_warning try: df = pd.read_csv(file.path, delim_whitespace=True, error_bad_lines=False) send_processor_warning(PROC, 'Dataframe', ( f'The Picarro Processor failed to read file {file.name} ' + 'It was re-parsed, skipping unreadable lines, but should be' + ' investigated.')) except Exception as e: logger.error( f'Exception {e.args} occurred in check_load_new_data() while reading a file.' + f' The file was {file.name}') send_processor_email(PROC, exception=e) continue except Exception as e: logger.error( f'Exception {e.args} occurred in check_load_new_data() while reading a file.' + f' The file was {file.name}') send_processor_email(PROC, exception=e) continue original_length = len(df) df.dropna(axis=0, how='any', inplace=True) new_length = len(df) diff = original_length - new_length if diff: logger.warning( f'Dataframe contained {diff} null values in {file.name}.') from summit_errors import send_processor_warning send_processor_warning(PROC, 'DataFrame', ( f'The Picarro Processor cut {diff} lines from a dataframe after reading it.\n' + f'{file.name} should be investigated and cleaned if necessary.' )) # CO2 stays in ppm df['CO_sync'] *= 1000 # convert CO to ppb df['CH4_sync'] *= 1000 # convert CH4 to ppb df['CH4_dry_sync'] *= 1000 df_list = df.to_dict('records') # convert to list of dicts data_list = [] for line in df_list: data_list.append(Datum(line)) if data_list: data_dates = [d.date for d in data_list] dates_already_in_db = session.query(Datum.date).filter( Datum.date.in_(data_dates)).all() dates_already_in_db[:] = [d.date for d in dates_already_in_db] for d in data_list: if d.date not in dates_already_in_db: d.file_id = file.id # relate Datum to the file it originated in session.add(d) else: logger.info(f'No new data created from file {file.name}.') file.processed = True file.size = check_filesize(file.path) logger.info(f'All data in file {file.name} processed.') session.commit() return True except Exception as e: logger.error(f'Exception {e.args} occurred in check_load_new_data().') send_processor_email(PROC, exception=e) return False
async def create_mastercals(logger): """ Searches all un-committed CalEvents, looking for (high, middle, low) sets that can then have a curve and other stats calculated. It will report them as DEBUG items in the log. :param logger: logging logger at module level :return: boolean, did it run/process new data? """ logger.info('Running create_mastercals()') try: from summit_core import picarro_dir as rundir from summit_core import connect_to_db from summit_picarro import MasterCal, CalEvent, match_cals_by_min import matplotlib.pyplot as plt import seaborn as sns import numpy as np except Exception as e: logger.error('ImportError occured in create_mastercals()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_picarro.sqlite', rundir) except Exception as e: logger.error( f'Exception {e.args} prevented connection to database in create_mastercals()' ) send_processor_email(PROC, exception=e) return False try: # Get cals by standard, but only if they're not in another MasterCal already lowcals = (session.query(CalEvent).filter( CalEvent.mastercal_id == None, CalEvent.standard_used == 'low_std').all()) highcals = (session.query(CalEvent).filter( CalEvent.mastercal_id == None, CalEvent.standard_used == 'high_std').all()) midcals = (session.query(CalEvent).filter( CalEvent.mastercal_id == None, CalEvent.standard_used == 'mid_std').all()) mastercals = [] for lowcal in lowcals: matching_high = match_cals_by_min(lowcal, highcals, minutes=5) if matching_high: matching_mid = match_cals_by_min(matching_high, midcals, minutes=5) if matching_mid: mastercals.append( MasterCal([lowcal, matching_high, matching_mid])) if mastercals: for mc in mastercals: # calculate curve from low - high point, and check middle distance mc.create_curve() session.add(mc) logger.info(f'MasterCal for {mc.subcals[0].date} created.') session.commit() return True else: logger.info('No MasterCals were created.') return False except Exception as e: logger.error(f'Exception {e.args} occured in create_mastercals()') send_processor_email(PROC, exception=e) return False
async def find_cal_events(logger): """ Searches the existing data for unused calibration data and creates/commits CalEvents if possible. :param logger: logging logger at module level :return: boolean, did it run/process new data? """ logger.info('Running find_cal_events()') try: from summit_core import connect_to_db from summit_core import picarro_dir as rundir from summit_picarro import Base, Datum, CalEvent, mpv_converter, find_cal_indices from summit_picarro import log_event_quantification, filter_postcal_data except Exception as e: logger.error('ImportError occured in find_cal_events()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_picarro.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error(f'Exception {e.args} occurred in find_cal_events()') send_processor_email(PROC, exception=e) return False try: standard_data = {} for MPV in [2, 3, 4]: mpv_data = pd.DataFrame( session.query( Datum.id, Datum.date).filter(Datum.mpv_position == MPV).filter( Datum.cal_id == None).all()) # get only data for this switching valve position, and not already in any calibration event if not len(mpv_data): logger.info( f'No new calibration events found for standard {mpv_converter[MPV]}' ) continue mpv_data['date'] = pd.to_datetime(mpv_data['date']) # use mpv_converter dict to get standard information standard_data[mpv_converter[MPV]] = mpv_data.sort_values( by=['date']).reset_index(drop=True) for standard, data in standard_data.items(): indices = find_cal_indices(data['date']) cal_events = [] if not len(indices) and len(data): # if there's not provided indices, but there's still calibration data, create the one event event_data = session.query(Datum).filter( Datum.id.in_(data['id'])).all() cal_events.append(CalEvent(event_data, standard)) elif not len(indices): # if there's no provided indices logger.info( f'No new cal events were found for {standard} standard.') continue prev_ind = 0 for num, ind in enumerate( indices): # get all data within this event event_data = session.query(Datum).filter( Datum.id.in_(data['id'].iloc[prev_ind:ind])).all() cal_events.append(CalEvent(event_data, standard)) if num == ( len(indices) - 1 ): # if it's the last index, get all ahead of it as the last event event_data = session.query(Datum).filter( Datum.id.in_(data['id'].iloc[ind:])).all() cal_events.append(CalEvent(event_data, standard)) prev_ind = ind for ev in cal_events: filter_postcal_data( ev, session ) # flag the following minute as questionable data (inst_status = 999) if ev.date - ev.dates[0] < dt.timedelta(seconds=90): logger.info( f'CalEvent for date {ev.date} had a duration < 90s and was ignored.' ) ev.standard_used = 'dump' # give not-long-enough events standard type 'dump' so they're ignored session.merge(ev) else: for cpd in ['co', 'co2', 'ch4']: ev.calc_result( cpd, 21 ) # calculate results for all compounds going 21s back session.merge(ev) logger.info(f'CalEvent for date {ev.date} added.') log_event_quantification( logger, ev) # show quantification info as DEBUG in log session.commit() return True except Exception as e: logger.error(f'Exception {e.args} occurred in find_cal_events()') send_processor_email(PROC, exception=e) return False
async def move_log_files(logger): """ Runs continuously and sleeps for 10 minutes at a time. Comb the directories for new data files and move any that are new or have been updated. This WILL NOT handle turning over a new year in the daily files well, as they have no year in the filename. I can't fix that. :param logger: logging logger to log to :return: boolean, True if ran without errors """ while True: try: from summit_errors import send_processor_email, EmailTemplate, sender, processor_email_list from shutil import copy import datetime as dt import os except ImportError: logger.error('ImportError occurred in move_log_files()') return False try: engine, session = connect_to_db('sqlite:///summit_core.sqlite', core_dir) MovedFile.__table__.create(engine, checkfirst=True) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in move_log_files()' ) send_processor_email('Core', exception=e) return False try: logger.info('Running move_log_files()') sync_paths = [ methane_logs_sync, voc_logs_sync, daily_logs_sync, picarro_logs_sync ] data_paths = [ methane_logs_path, voc_logs_path, daily_logs_path, picarro_logs_path ] data_types = ['methane', 'voc', 'daily', 'picarro'] file_types = ['.txt', '.txt', '.txt', '.dat'] for sync_path, type_, data_path, file_type in zip( sync_paths, data_types, data_paths, file_types): # change the name of the daily files before reading them in (implemented: 2/14/2020) for d in get_all_data_files(daily_logs_sync, '.txt'): if check_path_date(d).year == dt.datetime.now( ).year and "2020" not in str(d): name, extension = os.path.splitext(d) d.rename(name + '_' + str(dt.datetime.now().year) + extension) sync_files = [ MovedFile(path, type_, 'sync', check_filesize(path)) for path in get_all_data_files(sync_path, file_type) ] data_files = (session.query(MovedFile).filter( MovedFile.location == 'data').filter( MovedFile.type == type_).all()) moved_data_files = [d.name for d in data_files] for file in sync_files: if file.name not in moved_data_files: try: copy(file.path, data_path) # will overwrite except PermissionError: logger.error( f'File {file.name} could not be moved due to a permissions error.' ) from summit_errors import send_processor_warning send_processor_warning( PROC, 'PermissionError', f'File {file.name} could not be moved due a permissions error.\n' + 'Copying/pasting the file, deleting the old one, and renaming ' + 'the file to its old name should allow it to be processed.\n' + 'This will require admin privelidges.') continue file.path = data_path / file.name file.location = 'data' session.merge(file) logger.info( f'File {file.name} moved to data directory.') else: matched_file = search_for_attr_value( data_files, 'name', file.name) if file.size > matched_file.size: try: copy(file.path, data_path) # will overwrite except PermissionError: logger.error( f'File {file.name} could not be moved due to a permissions error.' ) from summit_errors import send_processor_warning send_processor_warning( PROC, 'PermissionError', f'File {file.name} could not be moved due a permissions error.\n' + 'Copying/pasting the file, deleting the old one, and renaming ' + 'the file to its old name should allow it to be processed.\n' + 'This will require admin privelidges.') continue matched_file.size = check_filesize( matched_file.path) session.merge(matched_file) logger.info( f'File {matched_file.name} updated in data directory.' ) session.commit() session.close() engine.dispose() import gc gc.collect() for i in range(20): await asyncio.sleep(30) except Exception as e: logger.error(f'Exception {e.args} occurred in move_log_files().') send_processor_email('Core', exception=e) session.close() engine.dispose() return False
errors = await asyncio.create_task( check_for_new_data(logger, active_errors=errors)) if errors: errors = await asyncio.create_task( check_existing_errors(logger, active_errors=errors)) print('Sleeping...') for i in range(40): await asyncio.sleep(30) if __name__ == '__main__': try: from summit_core import methane_dir as rundir from summit_core import configure_logger logger = configure_logger(rundir, __name__) except Exception as e: print(f'Error {e.args} prevented logger configuration.') send_processor_email('MAIN', exception=e) raise e loop = asyncio.get_event_loop() loop.create_task(move_log_files(logger)) loop.create_task(main(logger)) loop.run_forever()
async def check_load_run_logs(logger): """ Read new log files and create new GcRun and Sample objects if possible. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from summit_core import methane_logs_path from summit_core import methane_dir as rundir from summit_core import get_all_data_files, connect_to_db from summit_methane import Base, GcRun, Sample, read_log_file except ImportError as e: logger.error('ImportError occurred in check_load_run_logs()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_load_pa_log()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running check_load_run_logs()') runs_in_db = session.query(GcRun).all() samples = session.query(Sample) sample_count = samples.count() run_dates = [r.date for r in runs_in_db] files = get_all_data_files(methane_logs_path, '.txt') runs = [] for file in files: runs.append(read_log_file(file)) new_run_count = 0 # count runs added for run in runs: if run.date not in run_dates: session.add(run) logger.info(f'GcRun for {run.date} added.') new_run_count += 1 if not new_run_count: logger.info('No new GcRuns added.') else: session.commit() new_sample_count = session.query(Sample).count() - sample_count logger.info( f'{new_run_count} GcRuns added, containing {new_sample_count} Samples.' ) if new_run_count * 10 != new_sample_count: logger.warning( 'There were not ten Samples per GcRun as expected.') session.close() engine.dispose() return True except Exception as e: session.close() engine.dispose() logger.error(f'Exception {e.args} occurred in check_load_pa_log()') send_processor_email(PROC, exception=e) return False
async def read_excel_sheet(logger): logger.info('Running update_excel_sheet()') try: import pandas as pd from datetime import datetime from summit_core import methane_dir as rundir from summit_errors import send_processor_warning from summit_methane import GcRun, Base, add_formulas_and_format_sheet from summit_core import Config, connect_to_db, append_df_to_excel from summit_core import methane_dir, core_dir, data_file_paths methane_sheet = data_file_paths.get('methane_sheet', None) if not methane_sheet: logger.error( 'Filepath for the methane integration sheet could not be retrieved.' ) send_processor_warning( PROC, 'Filepath Error', '''The methane integration sheet filepath could not be retrieved. It should be listed as "methane_sheet" in file_locations.json in the core folder.''' ) return False except ImportError as e: logger.error('ImportError occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Config.__table__.create(core_engine, checkfirst=True) methane_sheet_read_config = (core_session.query(Config).filter( Config.processor == 'methane_sheet_read').one_or_none()) if not methane_sheet_read_config: methane_sheet_read_config = Config(processor='methane_sheet_read') # use all default values except processor on init core_session.add(methane_sheet_read_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: core_session.merge(methane_sheet_read_config) core_session.commit() session.close() engine.dispose() core_session.close() core_engine.dispose() return True except Exception as e: session.close() engine.dispose() core_session.close() core_engine.dispose() logger.error(f'Exception {e.args} occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False
async def check_load_dailies(logger): """ TODO: :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from summit_core import connect_to_db, get_all_data_files, core_dir, daily_logs_path, search_for_attr_value except ImportError as e: logger.error(f'ImportError occurred in check_load_dailies()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_daily.sqlite', core_dir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Error {e.args} prevented connecting to the database in check_load_dailies()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running check_load_dailies()') daily_files_in_db = session.query(DailyFile).all() daily_files = [ DailyFile(path) for path in get_all_data_files(daily_logs_path, '.txt') ] new_files = [] for file in daily_files: file_in_db = search_for_attr_value(daily_files_in_db, 'path', file.path) if not file_in_db: new_files.append(file) logger.info(f'File {file.name} added for processing.') else: if file.size > file_in_db.size: logger.info( f'File {file_in_db.name} added to process additional data.' ) new_files.append(file_in_db) if new_files: for file in new_files: dailies = read_daily_file(file.path) file_daily_dates = [d.date for d in file.entries] file.entries.extend( [d for d in dailies if d.date not in file_daily_dates]) file.size = file.path.stat().st_size session.merge(file) session.commit() session.close() engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in check_load_dailies()') send_processor_email(PROC, exception=e) session.close() engine.dispose() return False
async def quantify_samples(logger): """ On a per-run basis, use std1 to calc samples 1-5 (~3) and std2 to calculate samples 6-10 (~8). Output warnings if only one standard in a sample is valid. :param logger: logger, to log events to :return: Boolean, True if successful """ try: from summit_core import methane_dir as rundir from summit_core import connect_to_db, search_for_attr_value from summit_methane import Standard, GcRun, Base from summit_methane import calc_ch4_mr, valid_sample except Exception as e: logger.error('ImportError occurred in qunatify_samples()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_load_pa_log()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running quantify_samples()') unquantified_runs = session.query(GcRun).filter( GcRun.median == None).all() ct = 0 for run in unquantified_runs: # TODO: Move the majority of this to class methods for GcRuns; will make editing integrations WAY easier samples = run.samples standard = ( session.query(Standard).filter( run.date >= Standard.date_st, run.date < Standard.date_en).first() ) # TODO; Set unique constraints on standards, revert to one_or_none() if standard is not None: ambients = [ sample for sample in samples if (sample.sample_type == 3 and valid_sample(sample)) ] standard1 = search_for_attr_value(samples, 'sample_num', 2) standard2 = search_for_attr_value(samples, 'sample_num', 7) if not ambients: logger.warning( f'No ambient samples were quantifiable in GcRun for {run.date}' ) continue if (not valid_sample(standard1)) and ( not valid_sample(standard2)): logger.warning( f'No valid standard samples found in GcRun for {run.date}.' ) continue elif not valid_sample(standard1): # use std2 for all ambient quantifications logger.info( f'Only one standard used for samples in GcRun for {run.date}' ) for amb in ambients: amb = calc_ch4_mr(amb, standard2, standard) elif not valid_sample(standard2): # use std1 for all ambient quantifications logger.info( f'Only one standard used for samples in GcRun for {run.date}' ) for amb in ambients: amb = calc_ch4_mr(amb, standard1, standard) else: # use std1 for ambients 0-4 and std2 for ambients 5-9 for amb in ambients: if amb.sample_num < 5: amb = calc_ch4_mr(amb, standard1, standard) else: amb = calc_ch4_mr(amb, standard2, standard) run.standard_rsd = ( s.stdev([standard1.peak.pa, standard2.peak.pa]) / s.median([standard1.peak.pa, standard2.peak.pa])) from summit_methane import plottable_sample all_run_mrs = [ amb.peak.mr for amb in ambients if plottable_sample(amb) ] # do basic filtering for calculating run medians if all_run_mrs: run.median = s.median(all_run_mrs) if len(all_run_mrs) > 1: run.rsd = s.stdev(all_run_mrs) / run.median session.merge(run) # merge only the run, it contains and cascades samples, palines and peaks that were changed ct += 1 else: logger.warning( f'No standard value found for GcRun at {run.date}.') session.commit() if ct: logger.info(f'{ct} GcRuns were successfully quantified.') session.close() engine.dispose() return True else: logger.info('No GcRuns quantified.') session.close() engine.dispose() return False except Exception as e: logger.error(f'Exception {e.args} occurred in quantify_samples()') send_processor_email(PROC, exception=e) return False
async def match_peaks_to_samples(logger): """ All detected peaks in a run are attached to NmhcLines, but are not linked to Samples until they've passed certain criteria. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from summit_core import methane_dir as rundir from summit_core import connect_to_db, split_into_sets_of_n from summit_methane import Peak, Sample, GcRun, Base, sample_rts from operator import attrgetter import datetime as dt except ImportError as e: logger.error('ImportError occurred in match_peaks_to_samples()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in match_peaks_to_samples()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running match_peaks_to_samples()') unmatched_samples = session.query(Sample).filter( Sample.peak_id == None, Sample.run_id != None).all() whole_set = list({s.run_id for s in unmatched_samples}) # SQLite can't take in clauses with > 1000 variables, so chunk to sets of 500 if len(whole_set) > 500: # subdivide set sets = split_into_sets_of_n(whole_set, 500) else: sets = [ whole_set ] # TODO: Can be reduced to just splitting, this step is done automatically by split_into. runs_w_unmatched_samples = [] for set in sets: runs_w_unmatched_samples.extend( (session.query(GcRun).filter(GcRun.id.in_(set)).all() )) # create set of runs that require processing for run in runs_w_unmatched_samples: # loop through runs containing samples that haven't been matched with peaks samples = session.query(Sample).filter( Sample.run_id == run.id).all() peaks = session.query(Peak).filter( Peak.pa_line_id == run.pa_line_id) for sample in samples: sn = sample.sample_num potential_peaks = peaks.filter( Peak.rt.between(sample_rts[sn][0], sample_rts[sn][1])).all() # filter for peaks in this gc run between the expected retention times given in sample_rts if len(potential_peaks): # currently, the criteria for "this is the real peak" is "this is the biggest peak" peak = max(potential_peaks, key=attrgetter('pa')) if peak: sample.peak = peak peak.name = 'CH4_' + str(sample.sample_num) sample.date = run.pa_line.date + dt.timedelta( minutes=peak.rt - 1) session.merge(sample) session.commit() session.close() engine.dispose() return True except Exception as e: logger.error(f'Excetion {e.args} occurred in match_peaks_to_samples()') send_processor_email(PROC, exception=e) return False
async def check_load_pa_log(logger): """ Read the PA log and create new PaLine objects if possible. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ logger.info('Running check_load_pa_log()') try: from summit_core import methane_LOG_path as pa_filepath from summit_core import connect_to_db, check_filesize, core_dir, Config, split_into_sets_of_n from summit_methane import Base, read_pa_line, PaLine from summit_core import methane_dir as rundir from pathlib import Path except ImportError as e: logger.error('ImportError occurred in check_load_pa_log()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_load_pa_log()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Config.__table__.create(core_engine, checkfirst=True) ch4_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not ch4_config: ch4_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(ch4_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: if check_filesize(pa_filepath) <= ch4_config.filesize: logger.info('PA file did not change size.') return False ch4_config.filesize = check_filesize(pa_filepath) core_session.merge(ch4_config) core_session.commit() line_to_start = ch4_config.pa_startline - 3 # pad start to avoid missing samples if line_to_start < 0: line_to_start = 0 pa_file_contents = pa_filepath.read_text().split('\n')[line_to_start:] ch4_config.pa_startline = ch4_config.pa_startline + len( pa_file_contents) - 1 pa_file_contents[:] = [line for line in pa_file_contents if line] pa_lines = [] for line in pa_file_contents: pa_lines.append(read_pa_line(line)) if not pa_lines: logger.info('No new PaLines found.') return False else: ct = 0 # count committed logs all_line_dates = [line.date for line in pa_lines] # SQLite can't take in clauses with > 1000 variables, so chunk to sets of 500 if len(all_line_dates) > 500: sets = split_into_sets_of_n(all_line_dates, 500) else: sets = [all_line_dates] # TODO: Can be reduced to just splitting, this step is done automatically by split_into. dates_already_in_db = [] for set in sets: set_matches = session.query(PaLine.date).filter( PaLine.date.in_(set)).all() set_matches[:] = [s.date for s in set_matches] dates_already_in_db.extend(set_matches) for line in pa_lines: if line.date not in dates_already_in_db: session.add(line) logger.info(f'PaLine for {line.date} added.') ct += 1 if ct == 0: logger.info('No new PaLines found.') else: logger.info(f'{ct} PaLines added.') session.commit() core_session.merge(ch4_config) core_session.commit() session.close() engine.dispose() core_session.close() core_engine.dispose() return True except Exception as e: session.close() engine.dispose() core_session.close() core_engine.dispose() logger.error(f'Exception {e.args} occurred in check_load_pa_log()') send_processor_email(PROC, exception=e) return False
async def plot_new_data(logger): """ Checks data against the last plotting time, and creates new plots for CO, CO2, and CH4 if new data exists. :param logger: logging logger at module level :return: boolean, did it run/process new data? """ logger.info('Running plot_new_data()') try: from pathlib import Path from summit_core import picarro_dir as rundir from summit_core import create_daily_ticks, connect_to_db, TempDir, Plot, core_dir, Config, add_or_ignore_plot from summit_picarro import Base, Datum, summit_picarro_plot plotdir = rundir / 'plots' remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots' except Exception as e: logger.error('ImportError occurred in plot_new_data()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_picarro.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error(f'Exception {e.args} occurred in plot_new_data()') send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) picarro_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not picarro_config: picarro_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(picarro_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: newest_data_point = (session.query(Datum.date).filter( Datum.mpv_position == 1).order_by(Datum.date.desc()).first()[0]) if newest_data_point <= picarro_config.last_data_date: logger.info('No new data was found to plot.') core_session.close() core_engine.dispose() session.close() engine.dispose() return False picarro_config.last_data_date = newest_data_point core_session.add(picarro_config) date_limits, major_ticks, minor_ticks = create_daily_ticks( picarro_config.days_to_plot) all_data = ( session.query(Datum.date, Datum.co, Datum.co2, Datum.ch4).filter(( Datum.mpv_position == 0) | (Datum.mpv_position == 1)).filter( (Datum.instrument_status == 963), (Datum.alarm_status == 0)).filter( Datum.date >= date_limits['left'] ) # grab only data that falls in plotting period .all()) if not all_data: logger.info('No new data was found to plot.') core_session.close() core_engine.dispose() session.close() engine.dispose() return False # get only ambient data dates = [] co = [] co2 = [] ch4 = [] for result in all_data: dates.append(result.date) co.append(result.co) co2.append(result.co2) ch4.append(result.ch4) with TempDir(plotdir): from summit_core import five_minute_medians dates_co, co = five_minute_medians(dates, co) name = summit_picarro_plot(None, ({ 'Summit CO': [dates_co, co] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 60, 'top': 180 }, major_ticks=major_ticks, minor_ticks=minor_ticks) co_plot = Plot(plotdir / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(co_plot, core_session) name = summit_picarro_plot(None, ({ 'Summit CO2': [dates, co2] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 400, 'top': 420 }, major_ticks=major_ticks, minor_ticks=minor_ticks, unit_string='ppmv') co2_plot = Plot(plotdir / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(co2_plot, core_session) name = summit_picarro_plot(None, ({ 'Summit Methane [Picarro]': [dates, ch4] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 1850, 'top': 2050 }, major_ticks=major_ticks, minor_ticks=minor_ticks) ch4_plot = Plot(plotdir / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(ch4_plot, core_session) logger.info('New data plots were created.') session.close() engine.dispose() core_session.commit() core_session.close() core_engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in plot_new_data()') send_processor_email(PROC, exception=e) session.close() engine.dispose() core_session.close() core_engine.dispose() return False
async def plot_new_data(logger): """ If newer data exists, plot it going back one week from the day of the plotting. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from pathlib import Path from summit_core import core_dir, Config from summit_core import methane_dir as rundir from summit_core import connect_to_db, create_daily_ticks, TempDir, Plot, add_or_ignore_plot from summit_methane import Sample, GcRun, Base, plottable_sample, summit_methane_plot remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots' except ImportError as e: logger.error('ImportError occurred in plot_new_data()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) ch4_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not ch4_config: ch4_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(ch4_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running plot_new_data()') engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) runs_with_medians = (session.query(GcRun).filter( GcRun.median != None).filter(GcRun.standard_rsd < .02).filter( GcRun.rsd < .02).order_by(GcRun.date).all()) last_ambient_date = runs_with_medians[-1].date # get date after filtering, ie don't plot if there's no new data getting plotted date_limits, major_ticks, minor_ticks = create_daily_ticks( ch4_config.days_to_plot) if last_ambient_date > ch4_config.last_data_date: ambient_dates = [run.date for run in runs_with_medians] ambient_mrs = [run.median for run in runs_with_medians] with TempDir(rundir / 'plots'): name = summit_methane_plot( None, {'Summit Methane [GC]': [ambient_dates, ambient_mrs]}, limits={ 'bottom': 1850, 'top': 2050, 'right': date_limits.get('right', None), 'left': date_limits.get('left', None) }, major_ticks=major_ticks, minor_ticks=minor_ticks) methane_plot = Plot(rundir / 'plots' / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(methane_plot, core_session) ch4_config.last_data_date = last_ambient_date core_session.merge(ch4_config) logger.info('New data plots created.') else: logger.info('No new data found to be plotted.') session.close() engine.dispose() core_session.commit() core_session.close() core_engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in plot_new_data()') send_processor_email(PROC, exception=e) core_session.close() core_engine.dispose() session.close() engine.dispose() return False
async def update_excel_sheet(logger): """ This checks for new GcRuns since it was last ran and creates a DataFrame containing run information that's appended to a spreadsheet on the Z-drive. This sheet is filled out by whoever does the manual integration, and is later read by TODO - I haven't written that yet to bring the updated peak areas back into the database and re-calculate mixing ratios. :param logger: logging logger for info and failures :return: bool, True if ran, False if errored """ logger.info('Running update_excel_sheet()') try: import pandas as pd from datetime import datetime from summit_core import methane_dir as rundir from summit_errors import send_processor_warning from summit_methane import GcRun, Base, add_formulas_and_format_sheet from summit_core import Config, connect_to_db, append_df_to_excel from summit_core import methane_dir, core_dir, data_file_paths methane_sheet = data_file_paths.get('methane_sheet', None) if not methane_sheet: logger.error( 'Filepath for the methane integration sheet could not be retrieved.' ) send_processor_warning( PROC, 'Filepath Error', '''The methane integration sheet filepath could not be retrieved. It should be listed as "methane_sheet" in file_locations.json in the core folder.''' ) return False except ImportError as e: logger.error('ImportError occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Config.__table__.create(core_engine, checkfirst=True) methane_sheet_config = core_session.query(Config).filter( Config.processor == 'methane_sheet').one_or_none() if not methane_sheet_config: methane_sheet_config = Config(processor='methane_sheet') # use all default values except processor on init core_session.add(methane_sheet_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: most_recent_gcrun = session.query(GcRun.date).order_by( GcRun.date.desc()).first() if not most_recent_gcrun: most_recent_gcrun = datetime( 1900, 1, 1) # default to a safely historic date else: most_recent_gcrun = most_recent_gcrun.date # get date from tuple response # object list of all the runs past the most recent date new_runs = session.query(GcRun).filter( GcRun.date > methane_sheet_config.last_data_date).all() if new_runs: col_list = [ 'date', 'filename', 'peak1', 'peak2', 'mr1', 'mr2', 'run_median', 'run_rsd', 'std_median', 'std_rsd' ] # list of all columns needed in the dataframe master_df = pd.DataFrame( index=None, columns=col_list) # frame an empty df for new run data for run in new_runs: df = pd.DataFrame( index=range(1, 6), columns=col_list) # create a five-row block to add later df['date'][1] = run.date df['filename'][ 1] = run.logfile.name # add date and filename for this block # The below can copy peak information from the automatic integrations into the spreadsheet # peaks1 = [sample.peak for sample in run.samples if sample.sample_num in [0,2,4,6,8]] # peaks2 = [sample.peak for sample in run.samples if sample.sample_num in [1,3,5,7,9]] # df.loc[0:5, 'peak1'] = [(peak.pa if peak else None) for peak in peaks1] # df.loc[0:5, 'peak2'] = [(peak.pa if peak else None) for peak in peaks2] master_df = master_df.append( df) # append block to all new ones so far # TODO: Anything touching sheets need to be carefully made to catch inacessible files ###################### append_df_to_excel(methane_sheet, master_df, **{'index': False}) # add all new lines and save sheet add_formulas_and_format_sheet( methane_sheet ) # open sheet and add formulas where non-existent, format cols logger.info( 'New GcRuns added to the automated integration spreadsheet.') methane_sheet_config.last_data_date = most_recent_gcrun else: logger.info( 'No new GcRuns found to add to the automated integration spreadsheet.' ) core_session.merge(methane_sheet_config) core_session.commit() session.close() engine.dispose() core_session.close() core_engine.dispose() return True except Exception as e: session.close() engine.dispose() core_session.close() core_engine.dispose() logger.error(f'Exception {e.args} occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False
async def plot_dailies(logger): """ Loads dailies for the last 3 weeks and plots with ticks for every three days and minor ticks for every day. Plots are registered with the core database so they're uploaded to the Taylor drive. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from pathlib import Path import datetime as dt from summit_core import connect_to_db, core_dir, TempDir, Config, Plot, add_or_ignore_plot, create_daily_ticks plotdir = core_dir / 'plots/daily' remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/protected/plots' try: os.chdir(plotdir) except FileNotFoundError: os.mkdir(plotdir) except ImportError as e: logger.error(f'ImportError occurred in plot_dailies()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_daily.sqlite', core_dir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Error {e.args} prevented connecting to the database in plot_dailies()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) daily_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not daily_config: daily_config = Config( processor=PROC, days_to_plot=21 ) # use all default values except processor on init core_session.add(daily_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running plot_dailies()') date_ago = datetime.now() - dt.timedelta( days=daily_config.days_to_plot + 1) # set a static for retrieving data at beginning of plot cycle date_limits, major_ticks, minor_ticks = create_daily_ticks( daily_config.days_to_plot, minors_per_day=1) major_ticks = [t for ind, t in enumerate(major_ticks) if ind % 3 == 0] # use every third daily tick dailies = session.query(Daily).filter(Daily.date >= date_ago).order_by( Daily.date).all() dailydict = {} for param in daily_parameters: dailydict[param] = [getattr(d, param) for d in dailies] with TempDir(plotdir): ## PLOT i-butane, n-butane, acetylene name = summit_daily_plot(dailydict.get('date'), ({ 'Ads Xfer A': [None, dailydict.get('ads_xfer_a')], 'Ads Xfer B': [None, dailydict.get('ads_xfer_b')], 'Valves Temp': [None, dailydict.get('valves_temp')], 'GC Xfer Temp': [None, dailydict.get('gc_xfer_temp')], 'Catalyst': [None, dailydict.get('catalyst')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 475 }, major_ticks=major_ticks, minor_ticks=minor_ticks) hot_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(hot_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'CJ1 Temp': [None, dailydict.get('cj1')], 'CJ2 Temp': [None, dailydict.get('cj2')], 'Standard Temp': [None, dailydict.get('std_temp')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 10, 'top': 50 }, major_ticks=major_ticks, minor_ticks=minor_ticks) room_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(room_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'H2 Gen Pressure': [None, dailydict.get('h2_gen_p')], 'Line Pressure': [None, dailydict.get('line_p')], 'Zero Pressure': [None, dailydict.get('zero_p')], 'FID Pressure': [None, dailydict.get('fid_p')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 75 }, y_label_str='Pressure (PSI)', major_ticks=major_ticks, minor_ticks=minor_ticks) pressure_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(pressure_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'Inlet Short Temp': [None, dailydict.get('inlet_short')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 60 }, major_ticks=major_ticks, minor_ticks=minor_ticks) inlet_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(inlet_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'Battery V': [None, dailydict.get('battv')], '12Va': [None, dailydict.get('v12a')], '15Va': [None, dailydict.get('v15a')], '15Vb': [None, dailydict.get('v15b')], '24V': [None, dailydict.get('v24')], '5Va': [None, dailydict.get('v5a')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 30 }, y_label_str='Voltage (v)', major_ticks=major_ticks, minor_ticks=minor_ticks) voltage_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(voltage_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'MFC1': [None, dailydict.get('mfc1')], 'MFC2': [None, dailydict.get('mfc2')], 'MFC3a': [None, dailydict.get('mfc3a')], 'MFC3b': [None, dailydict.get('mfc3b')], 'MFC4': [None, dailydict.get('mfc4')], 'MFC5': [None, dailydict.get('mfc5')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': -1, 'top': 3.5 }, y_label_str='Flow (Ml/min)', major_ticks=major_ticks, minor_ticks=minor_ticks) flow_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(flow_plot, core_session) core_session.commit() core_session.close() core_engine.dispose() session.close() engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in plot_dailies()') send_processor_email(PROC, exception=e) session.close() engine.dispose() return False
async def dual_plot_methane(logger): """ Connects to both the methane [gc] and picarro databases to create an overlayed plot of both data. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ PROC = 'Methane DualPlotter' try: from pathlib import Path from summit_core import core_dir, Config from summit_core import methane_dir from summit_core import picarro_dir from summit_core import connect_to_db, create_daily_ticks, TempDir, Plot, add_or_ignore_plot from summit_picarro import Datum from summit_methane import Base, GcRun, summit_methane_plot from summit_picarro import Base as PicarroBase remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots' except ImportError as e: logger.error('ImportError occurred in dual_plot_methane()') send_processor_email(PROC, exception=e) return False try: gc_engine, gc_session = connect_to_db( 'sqlite:///summit_methane.sqlite', methane_dir) Base.metadata.create_all(gc_engine) picarro_engine, picarro_session = connect_to_db( 'sqlite:///summit_picarro.sqlite', picarro_dir) PicarroBase.metadata.create_all(picarro_engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in dual_plot_methane()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) twoplot_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not twoplot_config: twoplot_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(twoplot_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running dual_plot_methane()') newest_picarro_data_point = (picarro_session.query(Datum.date).filter( Datum.mpv_position == 1).order_by(Datum.date.desc()).first()[0]) try: newest_gc_data_point = (gc_session.query(GcRun.date).filter( GcRun.median != None).filter(GcRun.standard_rsd < .02).filter( GcRun.rsd < .02).order_by(GcRun.date.desc()).first()[0]) except TypeError: logger.error( 'NoneType not subscriptable encountered due to lack of methane data to query.' ) from summit_errors import send_processor_warning send_processor_warning( PROC, 'Dual Plotter', '''The Methane Dual Plotter could not query any GcRuns for methane data.\n Check the database to make sure there are in fact GcRuns with medians and valid rsds. \nThis often happens when the methane database is remade without re-setting the filesize and pa_startlie in the config table of Core database, thus no peaks are found.''') return False newest_data_point = max(newest_picarro_data_point, newest_gc_data_point) if newest_data_point <= twoplot_config.last_data_date: logger.info('No new data was found to plot.') core_session.close() core_engine.dispose() picarro_session.close() picarro_engine.dispose() return False date_limits, major_ticks, minor_ticks = create_daily_ticks( twoplot_config.days_to_plot) if newest_data_point > twoplot_config.last_data_date: runs_with_medians = (gc_session.query(GcRun).filter( GcRun.median != None).filter(GcRun.standard_rsd < .02).filter( GcRun.rsd < .02).order_by(GcRun.date).all()) gc_dates = [run.date for run in runs_with_medians] gc_ch4 = [run.median for run in runs_with_medians] picarro_data = (picarro_session.query( Datum.date, Datum.ch4).filter((Datum.mpv_position == 0) | ( Datum.mpv_position == 1)).filter( (Datum.instrument_status == 963), (Datum.alarm_status == 0)).filter( Datum.date >= date_limits['left']).all() ) # grab only data that falls in plotting period picarro_dates = [p.date for p in picarro_data] picarro_ch4 = [p.ch4 for p in picarro_data] with TempDir(methane_dir / 'plots'): name = summit_methane_plot( None, { 'Summit Methane [Picarro]': [picarro_dates, picarro_ch4], 'Summit Methane [GC]': [gc_dates, gc_ch4] }, title='Summit Methane [Picarro & GC]', limits={ 'bottom': 1850, 'top': 2050, 'right': date_limits.get('right', None), 'left': date_limits.get('left', None) }, major_ticks=major_ticks, minor_ticks=minor_ticks) methane_plot = Plot(methane_dir / 'plots' / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(methane_plot, core_session) twoplot_config.last_data_date = newest_data_point core_session.merge(twoplot_config) logger.info('New data plots created.') else: logger.info('No new data found to be plotted.') gc_session.close() gc_engine.dispose() picarro_session.close() picarro_engine.dispose() core_session.commit() core_session.close() core_engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in dual_plot_methane()') send_processor_email(PROC, exception=e) core_session.close() core_engine.dispose() gc_session.close() gc_engine.dispose() picarro_session.close() picarro_engine.dispose() return False