Esempio n. 1
0
async def check_existing_errors(logger, active_errors=None):
    logger.info('Running check_existing_errors()')

    try:
        if not active_errors:
            logger.info(
                'Check_existing_errors() called without any active errors.')
            return False

        for ind, err in enumerate(active_errors):
            if err.reason == 'no new data':

                if err.is_resolved(
                        processor=err.email_template.processor,
                        last_data_time=err.email_template.last_data_time,
                        logger=logger):
                    active_errors[ind] = None
            else:
                logger.info('Error aside from "no new data" was found.')
                pass  # is_resolved() handles logging in both cases

        active_errors = [err for err in active_errors if err]

        return active_errors

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in check_existing_errors()')
        send_processor_email(PROC, exception=e)
        return active_errors
Esempio n. 2
0
async def check_send_plots(logger):
    """
    Look through all plots staged to be uploaded and remove them if successfully uploaded.
    :param logger: logging logger to log to
    :return: boolean, True if ran without errors
    """
    try:
        from summit_errors import send_processor_email
    except ImportError:
        logger.error('ImportError occurred in check_send_plots()')
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_core.sqlite',
                                        core_dir)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_send_plots()'
        )
        send_processor_email('Core', exception=e)
        return False

    try:
        plots_to_upload = session.query(Plot).filter(Plot.staged == True)

        remote_dirs = set([p.remote_path for p in plots_to_upload.all()])

        for remote_dir in remote_dirs:
            plot_set = plots_to_upload.filter(
                Plot.remote_path == remote_dir).all()

            if plot_set:
                paths_to_upload = [p.path for p in plot_set]
                successes = await send_files_sftp(paths_to_upload, remote_dir)

                for plot, success in zip(plots_to_upload, successes):
                    if success:
                        logger.info(f'Plot {plot.name} uploaded to website.')
                        session.delete(plot)
                    else:
                        logger.warning(f'Plot {plot.name} failed to upload.')

        session.commit()

        session.close()
        engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in check_send_plots().')
        send_processor_email('Core', exception=e)
        session.close()
        engine.dispose()
        return False
Esempio n. 3
0
async def main():
    try:
        from summit_core import configure_logger
        from summit_core import error_dir as rundir
        logger = configure_logger(rundir, __name__)
    except Exception as e:
        print('Error logger could not be configured')
        send_processor_email(PROC, exception=e)
        return False

    errors = []

    while True:
        errors = await asyncio.create_task(
            check_for_new_data(logger, active_errors=errors))
        await asyncio.create_task(
            check_existing_errors(logger, active_errors=errors))
Esempio n. 4
0
async def main():
    try:
        from summit_core import picarro_dir as rundir
        from summit_core import configure_logger
        logger = configure_logger(rundir, __name__)
    except Exception as e:
        print(f'Error {e.args} prevented logger configuration.')
        send_processor_email(PROC, exception=e)
        return

    try:
        if await asyncio.create_task(check_load_new_data(logger)):

            if await asyncio.create_task(find_cal_events(logger)):
                await asyncio.create_task(create_mastercals(logger))

            await asyncio.create_task(plot_new_data(logger))

        return True
    except Exception as e:
        logger.error(f'Exception {e.args} occurred in Picarro main()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 5
0
async def check_for_new_data(logger, active_errors=None):
    reason = 'no new data'

    if not active_errors:
        active_errors = []

    try:

        logger.info('Running check_for_new_data()')

        for proc, time_limit in zip(
            ['voc', 'methane', 'picarro'],
            [dt.timedelta(hours=hr) for hr in [8, 3, 2]]):

            last_data_time = get_last_processor_date(proc, logger)

            if not last_data_time:
                logger.warning(f'No data available to compare for {proc}.')
                continue

            if datetime.now() - last_data_time > time_limit:
                if matching_error(active_errors, reason, proc):
                    logger.error(
                        f'Error for {reason} for the {proc} processor is already active and was ignored.'
                    )
                    continue
                else:
                    active_errors.append(
                        Error(reason, new_data_found,
                              NewDataEmail(sender, proc, last_data_time)))

        return active_errors

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in check_for_new_data()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 6
0
async def match_runs_to_lines(logger):
    """
    Read new log files and create new GcRun and Sample objects if possible.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from summit_core import methane_dir as rundir
        from summit_core import connect_to_db
        from summit_methane import GcRun, PaLine, match_lines_to_runs, Base
    except ImportError as e:
        send_processor_email(PROC, exception=e)
        logger.error('ImportError occured in match_runs_to_lines()')
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_load_pa_log()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running match_runs_to_peaks()')

        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)

        unmatched_lines = session.query(PaLine).filter(
            PaLine.run == None).all()
        unmatched_runs = session.query(GcRun).filter(
            GcRun.pa_line_id == None).all()

        # married_runs_count = session.query(GcRun).filter(GcRun.status == 'married').count()

        lines, runs, count = match_lines_to_runs(unmatched_lines,
                                                 unmatched_runs)

        session.commit()

        if count:
            logger.info(f'{count} GcRuns matched with PaLines.')
            return True
        else:
            logger.info('No new GcRun-PaLine pairs matched.')
            return False

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in match_runs_to_lines()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 7
0
async def main():
    """
    Configure a logger and run processes in order, only proceeding if new data is created that warrants running the next
    processes.

    :return: Boolean, True if successful
    """

    try:
        from summit_core import methane_dir as rundir
        from summit_core import configure_logger
        logger = configure_logger(rundir, __name__)
    except Exception as e:
        print(f'Error {e.args} prevented logger configuration.')
        send_processor_email(PROC, exception=e)
        return

    try:
        new_pas = await asyncio.create_task(check_load_pa_log(logger))
        new_logs = await asyncio.create_task(check_load_run_logs(logger))

        if new_pas or new_logs:
            if await asyncio.create_task(match_runs_to_lines(logger)):
                if await asyncio.create_task(match_peaks_to_samples(logger)):
                    await asyncio.create_task(add_one_standard(logger))
                    if await asyncio.create_task(quantify_samples(logger)):
                        await asyncio.create_task(plot_new_data(logger))
                    await asyncio.create_task(update_excel_sheet(logger))

        return True

    except Exception as e:
        logger.critical(
            f'Exception {e.args} caused a complete failure of the CH4 processing.'
        )
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 8
0
async def add_one_standard(logger):
    """
    Add a single standard (the current working one), so that quantifications are possible. VERY TEMPORARY.

    :param logger: logger, to log events to
    :return: Boolean, True if successful
    """

    try:
        from summit_core import methane_dir as rundir
        from summit_core import connect_to_db
        from summit_methane import Standard, Base
    except ImportError as e:
        logger.error('ImportError occurred in add_one_standard()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_load_pa_log()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)

        current_standard_dates = [
            S.date_st for S in session.query(Standard).all()
        ]

        my_only_standard = Standard('ws_2019', 2067.16, datetime(2019, 1, 1),
                                    datetime(2019, 12, 31))

        if my_only_standard.date_st not in current_standard_dates:
            session.merge(my_only_standard)
            session.commit()

        session.close()
        engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in add_one_standard()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 9
0
async def check_load_new_data(logger):
    """
    Checks for new files, checks length of old ones for updates, and processes/commits new data to the database.

    :param logger: logging logger at module level
    :return: boolean, did it run/process new data?
    """

    logger.info('Running check_load_new_data()')

    try:
        from summit_core import picarro_logs_path as data_path
        from summit_core import picarro_dir as rundir
        from summit_core import connect_to_db, get_all_data_files, check_filesize
        from summit_picarro import Base, DataFile, Datum
        from sqlalchemy.orm.exc import MultipleResultsFound
        from summit_errors import EmailTemplate, sender, processor_email_list

        from pandas.errors import ParserError
    except ImportError as e:
        logger.error('ImportError occurred in check_load_new_data()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_picarro.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} caused database connection to fail in check_load_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        db_files = session.query(DataFile)
        db_filenames = [d.name for d in db_files.all()]

        all_available_files = get_all_data_files(data_path, '.dat')

        files_to_process = session.query(DataFile).filter(
            DataFile.processed == False).all()

        for file in all_available_files:
            try:
                db_match = db_files.filter(
                    DataFile._name == file.name).one_or_none()
            except MultipleResultsFound:
                logger.warning(
                    f'Multiple results found for file {file.name}. The first was used.'
                )
                db_match = db_files.filter(DataFile._name == file.name).first()

            if file.name not in db_filenames:
                files_to_process.append(DataFile(file))
            elif check_filesize(file) > db_match.size:
                # if a matching file was found and it's now bigger, append for processing
                logger.info(
                    f'File {file.name} had more data and was added for procesing.'
                )
                files_to_process.append(db_match)

        if not files_to_process:
            logger.warning('No new data was found.')
            return False

        for ind, file in enumerate(files_to_process):
            files_to_process[ind] = session.merge(
                file
            )  # merge files and return the merged object to overwrite the old
            logger.info(f'File {file.name} added for processing.')
        session.commit()

        for file in files_to_process:
            try:
                df = pd.read_csv(file.path, delim_whitespace=True)
            except EmptyDataError as e:
                logger.error(
                    f'Exception {e.args} occurred while reading {file.name}')
                send_processor_email(PROC, exception=e)
                continue
            except ParserError as e:
                logger.error(
                    f'Pandas ParserError occurred while reading {file.name}.')
                from summit_errors import send_processor_warning
                try:
                    df = pd.read_csv(file.path,
                                     delim_whitespace=True,
                                     error_bad_lines=False)
                    send_processor_warning(PROC, 'Dataframe', (
                        f'The Picarro Processor failed to read file {file.name} '
                        +
                        'It was re-parsed, skipping unreadable lines, but should be'
                        + ' investigated.'))

                except Exception as e:
                    logger.error(
                        f'Exception {e.args} occurred in check_load_new_data() while reading a file.'
                        + f' The file was {file.name}')
                    send_processor_email(PROC, exception=e)
                    continue
            except Exception as e:
                logger.error(
                    f'Exception {e.args} occurred in check_load_new_data() while reading a file.'
                    + f' The file was {file.name}')
                send_processor_email(PROC, exception=e)
                continue

            original_length = len(df)

            df.dropna(axis=0, how='any', inplace=True)

            new_length = len(df)
            diff = original_length - new_length

            if diff:
                logger.warning(
                    f'Dataframe contained {diff} null values in {file.name}.')
                from summit_errors import send_processor_warning

                send_processor_warning(PROC, 'DataFrame', (
                    f'The Picarro Processor cut {diff} lines from a dataframe after reading it.\n'
                    +
                    f'{file.name} should be investigated and cleaned if necessary.'
                ))

            # CO2 stays in ppm
            df['CO_sync'] *= 1000  # convert CO to ppb
            df['CH4_sync'] *= 1000  # convert CH4 to ppb
            df['CH4_dry_sync'] *= 1000

            df_list = df.to_dict('records')  # convert to list of dicts

            data_list = []
            for line in df_list:
                data_list.append(Datum(line))

            if data_list:
                data_dates = [d.date for d in data_list]
                dates_already_in_db = session.query(Datum.date).filter(
                    Datum.date.in_(data_dates)).all()
                dates_already_in_db[:] = [d.date for d in dates_already_in_db]

                for d in data_list:
                    if d.date not in dates_already_in_db:
                        d.file_id = file.id  # relate Datum to the file it originated in
                        session.add(d)
            else:
                logger.info(f'No new data created from file {file.name}.')

            file.processed = True
            file.size = check_filesize(file.path)
            logger.info(f'All data in file {file.name} processed.')
            session.commit()

        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in check_load_new_data().')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 10
0
async def create_mastercals(logger):
    """
    Searches all un-committed CalEvents, looking for (high, middle, low) sets that can then have a curve and
    other stats calculated. It will report them as DEBUG items in the log.

    :param logger: logging logger at module level
    :return: boolean, did it run/process new data?
    """

    logger.info('Running create_mastercals()')

    try:
        from summit_core import picarro_dir as rundir
        from summit_core import connect_to_db
        from summit_picarro import MasterCal, CalEvent, match_cals_by_min
        import matplotlib.pyplot as plt
        import seaborn as sns
        import numpy as np
    except Exception as e:
        logger.error('ImportError occured in create_mastercals()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_picarro.sqlite',
                                        rundir)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to database in create_mastercals()'
        )
        send_processor_email(PROC, exception=e)
        return False
    try:
        # Get cals by standard, but only if they're not in another MasterCal already
        lowcals = (session.query(CalEvent).filter(
            CalEvent.mastercal_id == None,
            CalEvent.standard_used == 'low_std').all())

        highcals = (session.query(CalEvent).filter(
            CalEvent.mastercal_id == None,
            CalEvent.standard_used == 'high_std').all())

        midcals = (session.query(CalEvent).filter(
            CalEvent.mastercal_id == None,
            CalEvent.standard_used == 'mid_std').all())

        mastercals = []
        for lowcal in lowcals:
            matching_high = match_cals_by_min(lowcal, highcals, minutes=5)

            if matching_high:
                matching_mid = match_cals_by_min(matching_high,
                                                 midcals,
                                                 minutes=5)

                if matching_mid:
                    mastercals.append(
                        MasterCal([lowcal, matching_high, matching_mid]))

        if mastercals:
            for mc in mastercals:
                # calculate curve from low - high point, and check middle distance
                mc.create_curve()
                session.add(mc)
                logger.info(f'MasterCal for {mc.subcals[0].date} created.')

            session.commit()
            return True

        else:
            logger.info('No MasterCals were created.')
            return False

    except Exception as e:
        logger.error(f'Exception {e.args} occured in create_mastercals()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 11
0
async def find_cal_events(logger):
    """
    Searches the existing data for unused calibration data and creates/commits CalEvents if possible.

    :param logger: logging logger at module level
    :return: boolean, did it run/process new data?
    """

    logger.info('Running find_cal_events()')
    try:
        from summit_core import connect_to_db
        from summit_core import picarro_dir as rundir
        from summit_picarro import Base, Datum, CalEvent, mpv_converter, find_cal_indices
        from summit_picarro import log_event_quantification, filter_postcal_data
    except Exception as e:
        logger.error('ImportError occured in find_cal_events()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_picarro.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(f'Exception {e.args} occurred in find_cal_events()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        standard_data = {}
        for MPV in [2, 3, 4]:
            mpv_data = pd.DataFrame(
                session.query(
                    Datum.id,
                    Datum.date).filter(Datum.mpv_position == MPV).filter(
                        Datum.cal_id == None).all())
            # get only data for this switching valve position, and not already in any calibration event

            if not len(mpv_data):
                logger.info(
                    f'No new calibration events found for standard {mpv_converter[MPV]}'
                )
                continue

            mpv_data['date'] = pd.to_datetime(mpv_data['date'])
            # use mpv_converter dict to get standard information
            standard_data[mpv_converter[MPV]] = mpv_data.sort_values(
                by=['date']).reset_index(drop=True)

        for standard, data in standard_data.items():
            indices = find_cal_indices(data['date'])

            cal_events = []

            if not len(indices) and len(data):
                # if there's not provided indices, but there's still calibration data, create the one event
                event_data = session.query(Datum).filter(
                    Datum.id.in_(data['id'])).all()
                cal_events.append(CalEvent(event_data, standard))

            elif not len(indices):
                # if there's no provided indices
                logger.info(
                    f'No new cal events were found for {standard} standard.')
                continue

            prev_ind = 0

            for num, ind in enumerate(
                    indices):  # get all data within this event
                event_data = session.query(Datum).filter(
                    Datum.id.in_(data['id'].iloc[prev_ind:ind])).all()
                cal_events.append(CalEvent(event_data, standard))

                if num == (
                        len(indices) - 1
                ):  # if it's the last index, get all ahead of it as the last event
                    event_data = session.query(Datum).filter(
                        Datum.id.in_(data['id'].iloc[ind:])).all()
                    cal_events.append(CalEvent(event_data, standard))

                prev_ind = ind

            for ev in cal_events:

                filter_postcal_data(
                    ev, session
                )  # flag the following minute as questionable data (inst_status = 999)

                if ev.date - ev.dates[0] < dt.timedelta(seconds=90):
                    logger.info(
                        f'CalEvent for date {ev.date} had a duration < 90s and was ignored.'
                    )
                    ev.standard_used = 'dump'  # give not-long-enough events standard type 'dump' so they're ignored
                    session.merge(ev)
                else:
                    for cpd in ['co', 'co2', 'ch4']:
                        ev.calc_result(
                            cpd, 21
                        )  # calculate results for all compounds going 21s back

                    session.merge(ev)
                    logger.info(f'CalEvent for date {ev.date} added.')
                    log_event_quantification(
                        logger, ev)  # show quantification info as DEBUG in log
            session.commit()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in find_cal_events()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 12
0
async def move_log_files(logger):
    """
    Runs continuously and sleeps for 10 minutes at a time. Comb the directories for new data files and move any that
    are new or have been updated. This WILL NOT handle turning over a new year in the daily files well, as they have no
    year in the filename. I can't fix that.

    :param logger: logging logger to log to
    :return: boolean, True if ran without errors
    """

    while True:
        try:
            from summit_errors import send_processor_email, EmailTemplate, sender, processor_email_list
            from shutil import copy
            import datetime as dt
            import os
        except ImportError:
            logger.error('ImportError occurred in move_log_files()')
            return False

        try:
            engine, session = connect_to_db('sqlite:///summit_core.sqlite',
                                            core_dir)
            MovedFile.__table__.create(engine, checkfirst=True)
        except Exception as e:
            logger.error(
                f'Exception {e.args} prevented connection to the database in move_log_files()'
            )
            send_processor_email('Core', exception=e)
            return False

        try:
            logger.info('Running move_log_files()')

            sync_paths = [
                methane_logs_sync, voc_logs_sync, daily_logs_sync,
                picarro_logs_sync
            ]
            data_paths = [
                methane_logs_path, voc_logs_path, daily_logs_path,
                picarro_logs_path
            ]
            data_types = ['methane', 'voc', 'daily', 'picarro']
            file_types = ['.txt', '.txt', '.txt', '.dat']

            for sync_path, type_, data_path, file_type in zip(
                    sync_paths, data_types, data_paths, file_types):

                # change the name of the daily files before reading them in (implemented: 2/14/2020)
                for d in get_all_data_files(daily_logs_sync, '.txt'):
                    if check_path_date(d).year == dt.datetime.now(
                    ).year and "2020" not in str(d):
                        name, extension = os.path.splitext(d)
                        d.rename(name + '_' + str(dt.datetime.now().year) +
                                 extension)

                sync_files = [
                    MovedFile(path, type_, 'sync', check_filesize(path))
                    for path in get_all_data_files(sync_path, file_type)
                ]

                data_files = (session.query(MovedFile).filter(
                    MovedFile.location == 'data').filter(
                        MovedFile.type == type_).all())
                moved_data_files = [d.name for d in data_files]

                for file in sync_files:
                    if file.name not in moved_data_files:
                        try:
                            copy(file.path, data_path)  # will overwrite
                        except PermissionError:
                            logger.error(
                                f'File {file.name} could not be moved due to a permissions error.'
                            )
                            from summit_errors import send_processor_warning
                            send_processor_warning(
                                PROC, 'PermissionError',
                                f'File {file.name} could not be moved due a permissions error.\n'
                                +
                                'Copying/pasting the file, deleting the old one, and renaming '
                                +
                                'the file to its old name should allow it to be processed.\n'
                                + 'This will require admin privelidges.')
                            continue
                        file.path = data_path / file.name
                        file.location = 'data'
                        session.merge(file)
                        logger.info(
                            f'File {file.name} moved to data directory.')
                    else:
                        matched_file = search_for_attr_value(
                            data_files, 'name', file.name)
                        if file.size > matched_file.size:
                            try:
                                copy(file.path, data_path)  # will overwrite
                            except PermissionError:
                                logger.error(
                                    f'File {file.name} could not be moved due to a permissions error.'
                                )
                                from summit_errors import send_processor_warning

                                send_processor_warning(
                                    PROC, 'PermissionError',
                                    f'File {file.name} could not be moved due a permissions error.\n'
                                    +
                                    'Copying/pasting the file, deleting the old one, and renaming '
                                    +
                                    'the file to its old name should allow it to be processed.\n'
                                    + 'This will require admin privelidges.')
                                continue

                            matched_file.size = check_filesize(
                                matched_file.path)
                            session.merge(matched_file)
                            logger.info(
                                f'File {matched_file.name} updated in data directory.'
                            )

            session.commit()

            session.close()
            engine.dispose()

            import gc
            gc.collect()

            for i in range(20):
                await asyncio.sleep(30)

        except Exception as e:
            logger.error(f'Exception {e.args} occurred in move_log_files().')
            send_processor_email('Core', exception=e)
            session.close()
            engine.dispose()
            return False
Esempio n. 13
0
        errors = await asyncio.create_task(
            check_for_new_data(logger, active_errors=errors))

        if errors:
            errors = await asyncio.create_task(
                check_existing_errors(logger, active_errors=errors))

        print('Sleeping...')
        for i in range(40):
            await asyncio.sleep(30)


if __name__ == '__main__':

    try:
        from summit_core import methane_dir as rundir
        from summit_core import configure_logger

        logger = configure_logger(rundir, __name__)

    except Exception as e:
        print(f'Error {e.args} prevented logger configuration.')
        send_processor_email('MAIN', exception=e)
        raise e

    loop = asyncio.get_event_loop()
    loop.create_task(move_log_files(logger))
    loop.create_task(main(logger))

    loop.run_forever()
Esempio n. 14
0
async def check_load_run_logs(logger):
    """
    Read new log files and create new GcRun and Sample objects if possible.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from summit_core import methane_logs_path
        from summit_core import methane_dir as rundir
        from summit_core import get_all_data_files, connect_to_db
        from summit_methane import Base, GcRun, Sample, read_log_file
    except ImportError as e:
        logger.error('ImportError occurred in check_load_run_logs()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_load_pa_log()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running check_load_run_logs()')
        runs_in_db = session.query(GcRun).all()
        samples = session.query(Sample)
        sample_count = samples.count()

        run_dates = [r.date for r in runs_in_db]

        files = get_all_data_files(methane_logs_path, '.txt')

        runs = []
        for file in files:
            runs.append(read_log_file(file))

        new_run_count = 0  # count runs added
        for run in runs:
            if run.date not in run_dates:
                session.add(run)
                logger.info(f'GcRun for {run.date} added.')
                new_run_count += 1

        if not new_run_count:
            logger.info('No new GcRuns added.')
        else:
            session.commit()
            new_sample_count = session.query(Sample).count() - sample_count
            logger.info(
                f'{new_run_count} GcRuns added, containing {new_sample_count} Samples.'
            )

            if new_run_count * 10 != new_sample_count:
                logger.warning(
                    'There were not ten Samples per GcRun as expected.')

        session.close()
        engine.dispose()
        return True

    except Exception as e:
        session.close()
        engine.dispose()

        logger.error(f'Exception {e.args} occurred in check_load_pa_log()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 15
0
async def read_excel_sheet(logger):
    logger.info('Running update_excel_sheet()')

    try:
        import pandas as pd
        from datetime import datetime

        from summit_core import methane_dir as rundir
        from summit_errors import send_processor_warning

        from summit_methane import GcRun, Base, add_formulas_and_format_sheet
        from summit_core import Config, connect_to_db, append_df_to_excel
        from summit_core import methane_dir, core_dir, data_file_paths

        methane_sheet = data_file_paths.get('methane_sheet', None)

        if not methane_sheet:
            logger.error(
                'Filepath for the methane integration sheet could not be retrieved.'
            )
            send_processor_warning(
                PROC, 'Filepath Error',
                '''The methane integration sheet filepath could not be retrieved. It should be listed
                                   as "methane_sheet" in file_locations.json in the core folder.'''
            )
            return False

    except ImportError as e:
        logger.error('ImportError occurred in update_excel_sheet()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in update_excel_sheet()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Config.__table__.create(core_engine, checkfirst=True)

        methane_sheet_read_config = (core_session.query(Config).filter(
            Config.processor == 'methane_sheet_read').one_or_none())

        if not methane_sheet_read_config:
            methane_sheet_read_config = Config(processor='methane_sheet_read')
            # use all default values except processor on init
            core_session.add(methane_sheet_read_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in update_excel_sheet()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:

        core_session.merge(methane_sheet_read_config)
        core_session.commit()

        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        return True

    except Exception as e:
        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        logger.error(f'Exception {e.args} occurred in update_excel_sheet()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 16
0
async def check_load_dailies(logger):
    """
    TODO:

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from summit_core import connect_to_db, get_all_data_files, core_dir, daily_logs_path, search_for_attr_value
    except ImportError as e:
        logger.error(f'ImportError occurred in check_load_dailies()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_daily.sqlite',
                                        core_dir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the database in check_load_dailies()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running check_load_dailies()')

        daily_files_in_db = session.query(DailyFile).all()

        daily_files = [
            DailyFile(path)
            for path in get_all_data_files(daily_logs_path, '.txt')
        ]

        new_files = []

        for file in daily_files:
            file_in_db = search_for_attr_value(daily_files_in_db, 'path',
                                               file.path)

            if not file_in_db:
                new_files.append(file)
                logger.info(f'File {file.name} added for processing.')
            else:
                if file.size > file_in_db.size:
                    logger.info(
                        f'File {file_in_db.name} added to process additional data.'
                    )
                    new_files.append(file_in_db)

        if new_files:
            for file in new_files:
                dailies = read_daily_file(file.path)
                file_daily_dates = [d.date for d in file.entries]
                file.entries.extend(
                    [d for d in dailies if d.date not in file_daily_dates])
                file.size = file.path.stat().st_size
                session.merge(file)

            session.commit()

        session.close()
        engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in check_load_dailies()')
        send_processor_email(PROC, exception=e)
        session.close()
        engine.dispose()
        return False
Esempio n. 17
0
async def quantify_samples(logger):
    """
    On a per-run basis, use std1 to calc samples 1-5 (~3) and std2 to calculate samples 6-10 (~8). Output warnings
    if only one standard in a sample is valid.

    :param logger: logger, to log events to
    :return: Boolean, True if successful
    """

    try:
        from summit_core import methane_dir as rundir
        from summit_core import connect_to_db, search_for_attr_value
        from summit_methane import Standard, GcRun, Base
        from summit_methane import calc_ch4_mr, valid_sample
    except Exception as e:
        logger.error('ImportError occurred in qunatify_samples()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_load_pa_log()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running quantify_samples()')

        unquantified_runs = session.query(GcRun).filter(
            GcRun.median == None).all()

        ct = 0
        for run in unquantified_runs:

            # TODO: Move the majority of this to class methods for GcRuns; will make editing integrations WAY easier
            samples = run.samples

            standard = (
                session.query(Standard).filter(
                    run.date >= Standard.date_st,
                    run.date < Standard.date_en).first()
            )  # TODO; Set unique constraints on standards, revert to one_or_none()

            if standard is not None:
                ambients = [
                    sample for sample in samples
                    if (sample.sample_type == 3 and valid_sample(sample))
                ]
                standard1 = search_for_attr_value(samples, 'sample_num', 2)
                standard2 = search_for_attr_value(samples, 'sample_num', 7)

                if not ambients:
                    logger.warning(
                        f'No ambient samples were quantifiable in GcRun for {run.date}'
                    )
                    continue

                if (not valid_sample(standard1)) and (
                        not valid_sample(standard2)):
                    logger.warning(
                        f'No valid standard samples found in GcRun for {run.date}.'
                    )
                    continue

                elif not valid_sample(standard1):
                    # use std2 for all ambient quantifications
                    logger.info(
                        f'Only one standard used for samples in GcRun for {run.date}'
                    )
                    for amb in ambients:
                        amb = calc_ch4_mr(amb, standard2, standard)

                elif not valid_sample(standard2):
                    # use std1 for all ambient quantifications
                    logger.info(
                        f'Only one standard used for samples in GcRun for {run.date}'
                    )
                    for amb in ambients:
                        amb = calc_ch4_mr(amb, standard1, standard)

                else:
                    # use std1 for ambients 0-4 and std2 for ambients 5-9
                    for amb in ambients:
                        if amb.sample_num < 5:
                            amb = calc_ch4_mr(amb, standard1, standard)
                        else:
                            amb = calc_ch4_mr(amb, standard2, standard)

                    run.standard_rsd = (
                        s.stdev([standard1.peak.pa, standard2.peak.pa]) /
                        s.median([standard1.peak.pa, standard2.peak.pa]))

                from summit_methane import plottable_sample

                all_run_mrs = [
                    amb.peak.mr for amb in ambients if plottable_sample(amb)
                ]
                # do basic filtering for calculating run medians
                if all_run_mrs:
                    run.median = s.median(all_run_mrs)
                    if len(all_run_mrs) > 1:
                        run.rsd = s.stdev(all_run_mrs) / run.median

                session.merge(run)
                # merge only the run, it contains and cascades samples, palines and peaks that were changed
                ct += 1

            else:
                logger.warning(
                    f'No standard value found for GcRun at {run.date}.')

        session.commit()

        if ct:
            logger.info(f'{ct} GcRuns were successfully quantified.')
            session.close()
            engine.dispose()
            return True
        else:
            logger.info('No GcRuns quantified.')
            session.close()
            engine.dispose()
            return False

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in quantify_samples()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 18
0
async def match_peaks_to_samples(logger):
    """
    All detected peaks in a run are attached to NmhcLines, but are not linked to Samples until they've passed certain
    criteria.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from summit_core import methane_dir as rundir
        from summit_core import connect_to_db, split_into_sets_of_n
        from summit_methane import Peak, Sample, GcRun, Base, sample_rts
        from operator import attrgetter
        import datetime as dt
    except ImportError as e:
        logger.error('ImportError occurred in match_peaks_to_samples()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in match_peaks_to_samples()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running match_peaks_to_samples()')

        unmatched_samples = session.query(Sample).filter(
            Sample.peak_id == None, Sample.run_id != None).all()

        whole_set = list({s.run_id for s in unmatched_samples})
        # SQLite can't take in clauses with > 1000 variables, so chunk to sets of 500
        if len(whole_set) > 500:  # subdivide set
            sets = split_into_sets_of_n(whole_set, 500)
        else:
            sets = [
                whole_set
            ]  # TODO: Can be reduced to just splitting, this step is done automatically by split_into.

        runs_w_unmatched_samples = []
        for set in sets:
            runs_w_unmatched_samples.extend(
                (session.query(GcRun).filter(GcRun.id.in_(set)).all()
                 ))  # create set of runs that require processing

        for run in runs_w_unmatched_samples:
            # loop through runs containing samples that haven't been matched with peaks
            samples = session.query(Sample).filter(
                Sample.run_id == run.id).all()
            peaks = session.query(Peak).filter(
                Peak.pa_line_id == run.pa_line_id)

            for sample in samples:
                sn = sample.sample_num
                potential_peaks = peaks.filter(
                    Peak.rt.between(sample_rts[sn][0],
                                    sample_rts[sn][1])).all()
                # filter for peaks in this gc run between the expected retention times given in sample_rts

                if len(potential_peaks):
                    # currently, the criteria for "this is the real peak" is "this is the biggest peak"
                    peak = max(potential_peaks, key=attrgetter('pa'))
                    if peak:
                        sample.peak = peak
                        peak.name = 'CH4_' + str(sample.sample_num)
                        sample.date = run.pa_line.date + dt.timedelta(
                            minutes=peak.rt - 1)
                        session.merge(sample)

        session.commit()
        session.close()
        engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Excetion {e.args} occurred in match_peaks_to_samples()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 19
0
async def check_load_pa_log(logger):
    """
    Read the PA log and create new PaLine objects if possible.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    logger.info('Running check_load_pa_log()')

    try:
        from summit_core import methane_LOG_path as pa_filepath
        from summit_core import connect_to_db, check_filesize, core_dir, Config, split_into_sets_of_n
        from summit_methane import Base, read_pa_line, PaLine
        from summit_core import methane_dir as rundir
        from pathlib import Path
    except ImportError as e:
        logger.error('ImportError occurred in check_load_pa_log()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_load_pa_log()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Config.__table__.create(core_engine, checkfirst=True)

        ch4_config = core_session.query(Config).filter(
            Config.processor == PROC).one_or_none()

        if not ch4_config:
            ch4_config = Config(
                processor=PROC
            )  # use all default values except processor on init
            core_session.add(ch4_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        if check_filesize(pa_filepath) <= ch4_config.filesize:
            logger.info('PA file did not change size.')
            return False

        ch4_config.filesize = check_filesize(pa_filepath)
        core_session.merge(ch4_config)
        core_session.commit()

        line_to_start = ch4_config.pa_startline - 3  # pad start to avoid missing samples
        if line_to_start < 0:
            line_to_start = 0

        pa_file_contents = pa_filepath.read_text().split('\n')[line_to_start:]

        ch4_config.pa_startline = ch4_config.pa_startline + len(
            pa_file_contents) - 1

        pa_file_contents[:] = [line for line in pa_file_contents if line]

        pa_lines = []
        for line in pa_file_contents:
            pa_lines.append(read_pa_line(line))

        if not pa_lines:
            logger.info('No new PaLines found.')
            return False
        else:
            ct = 0  # count committed logs
            all_line_dates = [line.date for line in pa_lines]

            # SQLite can't take in clauses with > 1000 variables, so chunk to sets of 500
            if len(all_line_dates) > 500:
                sets = split_into_sets_of_n(all_line_dates, 500)
            else:
                sets = [all_line_dates]
                # TODO: Can be reduced to just splitting, this step is done automatically by split_into.

            dates_already_in_db = []
            for set in sets:
                set_matches = session.query(PaLine.date).filter(
                    PaLine.date.in_(set)).all()
                set_matches[:] = [s.date for s in set_matches]
                dates_already_in_db.extend(set_matches)

            for line in pa_lines:
                if line.date not in dates_already_in_db:
                    session.add(line)
                    logger.info(f'PaLine for {line.date} added.')
                    ct += 1

            if ct == 0:
                logger.info('No new PaLines found.')
            else:
                logger.info(f'{ct} PaLines added.')
                session.commit()

        core_session.merge(ch4_config)
        core_session.commit()

        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        return True

    except Exception as e:
        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        logger.error(f'Exception {e.args} occurred in check_load_pa_log()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 20
0
async def plot_new_data(logger):
    """
    Checks data against the last plotting time, and creates new plots for CO, CO2, and CH4 if new data exists.

    :param logger: logging logger at module level
    :return: boolean, did it run/process new data?
    """

    logger.info('Running plot_new_data()')

    try:
        from pathlib import Path
        from summit_core import picarro_dir as rundir
        from summit_core import create_daily_ticks, connect_to_db, TempDir, Plot, core_dir, Config, add_or_ignore_plot
        from summit_picarro import Base, Datum, summit_picarro_plot

        plotdir = rundir / 'plots'
        remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots'

    except Exception as e:
        logger.error('ImportError occurred in plot_new_data()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_picarro.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(f'Exception {e.args} occurred in plot_new_data()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Plot.__table__.create(core_engine, checkfirst=True)
        Config.__table__.create(core_engine, checkfirst=True)

        picarro_config = core_session.query(Config).filter(
            Config.processor == PROC).one_or_none()

        if not picarro_config:
            picarro_config = Config(
                processor=PROC
            )  # use all default values except processor on init
            core_session.add(picarro_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        newest_data_point = (session.query(Datum.date).filter(
            Datum.mpv_position == 1).order_by(Datum.date.desc()).first()[0])

        if newest_data_point <= picarro_config.last_data_date:
            logger.info('No new data was found to plot.')
            core_session.close()
            core_engine.dispose()
            session.close()
            engine.dispose()
            return False

        picarro_config.last_data_date = newest_data_point
        core_session.add(picarro_config)

        date_limits, major_ticks, minor_ticks = create_daily_ticks(
            picarro_config.days_to_plot)

        all_data = (
            session.query(Datum.date, Datum.co, Datum.co2, Datum.ch4).filter((
                Datum.mpv_position == 0) | (Datum.mpv_position == 1)).filter(
                    (Datum.instrument_status == 963),
                    (Datum.alarm_status == 0)).filter(
                        Datum.date >= date_limits['left']
                    )  # grab only data that falls in plotting period
            .all())

        if not all_data:
            logger.info('No new data was found to plot.')
            core_session.close()
            core_engine.dispose()
            session.close()
            engine.dispose()
            return False

        # get only ambient data
        dates = []
        co = []
        co2 = []
        ch4 = []
        for result in all_data:
            dates.append(result.date)
            co.append(result.co)
            co2.append(result.co2)
            ch4.append(result.ch4)

        with TempDir(plotdir):

            from summit_core import five_minute_medians
            dates_co, co = five_minute_medians(dates, co)

            name = summit_picarro_plot(None, ({
                'Summit CO': [dates_co, co]
            }),
                                       limits={
                                           'right':
                                           date_limits.get('right', None),
                                           'left':
                                           date_limits.get('left', None),
                                           'bottom': 60,
                                           'top': 180
                                       },
                                       major_ticks=major_ticks,
                                       minor_ticks=minor_ticks)

            co_plot = Plot(plotdir / name, remotedir,
                           True)  # stage plots to be uploaded
            add_or_ignore_plot(co_plot, core_session)

            name = summit_picarro_plot(None, ({
                'Summit CO2': [dates, co2]
            }),
                                       limits={
                                           'right':
                                           date_limits.get('right', None),
                                           'left':
                                           date_limits.get('left', None),
                                           'bottom': 400,
                                           'top': 420
                                       },
                                       major_ticks=major_ticks,
                                       minor_ticks=minor_ticks,
                                       unit_string='ppmv')

            co2_plot = Plot(plotdir / name, remotedir,
                            True)  # stage plots to be uploaded
            add_or_ignore_plot(co2_plot, core_session)

            name = summit_picarro_plot(None, ({
                'Summit Methane [Picarro]': [dates, ch4]
            }),
                                       limits={
                                           'right':
                                           date_limits.get('right', None),
                                           'left':
                                           date_limits.get('left', None),
                                           'bottom': 1850,
                                           'top': 2050
                                       },
                                       major_ticks=major_ticks,
                                       minor_ticks=minor_ticks)

            ch4_plot = Plot(plotdir / name, remotedir,
                            True)  # stage plots to be uploaded
            add_or_ignore_plot(ch4_plot, core_session)

        logger.info('New data plots were created.')

        session.close()
        engine.dispose()

        core_session.commit()
        core_session.close()
        core_engine.dispose()
        return True
    except Exception as e:
        logger.error(f'Exception {e.args} occurred in plot_new_data()')
        send_processor_email(PROC, exception=e)

        session.close()
        engine.dispose()

        core_session.close()
        core_engine.dispose()
        return False
Esempio n. 21
0
async def plot_new_data(logger):
    """
    If newer data exists, plot it going back one week from the day of the plotting.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from pathlib import Path
        from summit_core import core_dir, Config
        from summit_core import methane_dir as rundir
        from summit_core import connect_to_db, create_daily_ticks, TempDir, Plot, add_or_ignore_plot
        from summit_methane import Sample, GcRun, Base, plottable_sample, summit_methane_plot

        remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots'

    except ImportError as e:
        logger.error('ImportError occurred in plot_new_data()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Plot.__table__.create(core_engine, checkfirst=True)
        Config.__table__.create(core_engine, checkfirst=True)

        ch4_config = core_session.query(Config).filter(
            Config.processor == PROC).one_or_none()

        if not ch4_config:
            ch4_config = Config(
                processor=PROC
            )  # use all default values except processor on init
            core_session.add(ch4_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running plot_new_data()')

        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)

        runs_with_medians = (session.query(GcRun).filter(
            GcRun.median != None).filter(GcRun.standard_rsd < .02).filter(
                GcRun.rsd < .02).order_by(GcRun.date).all())

        last_ambient_date = runs_with_medians[-1].date
        # get date after filtering, ie don't plot if there's no new data getting plotted

        date_limits, major_ticks, minor_ticks = create_daily_ticks(
            ch4_config.days_to_plot)

        if last_ambient_date > ch4_config.last_data_date:

            ambient_dates = [run.date for run in runs_with_medians]
            ambient_mrs = [run.median for run in runs_with_medians]

            with TempDir(rundir / 'plots'):
                name = summit_methane_plot(
                    None,
                    {'Summit Methane [GC]': [ambient_dates, ambient_mrs]},
                    limits={
                        'bottom': 1850,
                        'top': 2050,
                        'right': date_limits.get('right', None),
                        'left': date_limits.get('left', None)
                    },
                    major_ticks=major_ticks,
                    minor_ticks=minor_ticks)

                methane_plot = Plot(rundir / 'plots' / name, remotedir,
                                    True)  # stage plots to be uploaded
                add_or_ignore_plot(methane_plot, core_session)

                ch4_config.last_data_date = last_ambient_date
                core_session.merge(ch4_config)

            logger.info('New data plots created.')
        else:
            logger.info('No new data found to be plotted.')

        session.close()
        engine.dispose()

        core_session.commit()
        core_session.close()
        core_engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in plot_new_data()')
        send_processor_email(PROC, exception=e)
        core_session.close()
        core_engine.dispose()
        session.close()
        engine.dispose()
        return False
Esempio n. 22
0
async def update_excel_sheet(logger):
    """
    This checks for new GcRuns since it was last ran and creates a DataFrame containing run information that's appended
    to a spreadsheet on the Z-drive. This sheet is filled out by whoever does the manual integration, and is later read
    by TODO - I haven't written that yet
    to bring the updated peak areas back into the database and re-calculate mixing ratios.

    :param logger: logging logger for info and failures
    :return: bool, True if ran, False if errored
    """
    logger.info('Running update_excel_sheet()')

    try:
        import pandas as pd
        from datetime import datetime

        from summit_core import methane_dir as rundir
        from summit_errors import send_processor_warning

        from summit_methane import GcRun, Base, add_formulas_and_format_sheet
        from summit_core import Config, connect_to_db, append_df_to_excel
        from summit_core import methane_dir, core_dir, data_file_paths

        methane_sheet = data_file_paths.get('methane_sheet', None)

        if not methane_sheet:
            logger.error(
                'Filepath for the methane integration sheet could not be retrieved.'
            )
            send_processor_warning(
                PROC, 'Filepath Error',
                '''The methane integration sheet filepath could not be retrieved. It should be listed
                                   as "methane_sheet" in file_locations.json in the core folder.'''
            )
            return False

    except ImportError as e:
        logger.error('ImportError occurred in update_excel_sheet()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in update_excel_sheet()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Config.__table__.create(core_engine, checkfirst=True)

        methane_sheet_config = core_session.query(Config).filter(
            Config.processor == 'methane_sheet').one_or_none()

        if not methane_sheet_config:
            methane_sheet_config = Config(processor='methane_sheet')
            # use all default values except processor on init
            core_session.add(methane_sheet_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in update_excel_sheet()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        most_recent_gcrun = session.query(GcRun.date).order_by(
            GcRun.date.desc()).first()

        if not most_recent_gcrun:
            most_recent_gcrun = datetime(
                1900, 1, 1)  # default to a safely historic date
        else:
            most_recent_gcrun = most_recent_gcrun.date  # get date from tuple response

        # object list of all the runs past the most recent date
        new_runs = session.query(GcRun).filter(
            GcRun.date > methane_sheet_config.last_data_date).all()

        if new_runs:
            col_list = [
                'date', 'filename', 'peak1', 'peak2', 'mr1', 'mr2',
                'run_median', 'run_rsd', 'std_median', 'std_rsd'
            ]  # list of all columns needed in the dataframe

            master_df = pd.DataFrame(
                index=None,
                columns=col_list)  # frame an empty df for new run data

            for run in new_runs:
                df = pd.DataFrame(
                    index=range(1, 6),
                    columns=col_list)  # create a five-row block to add later
                df['date'][1] = run.date
                df['filename'][
                    1] = run.logfile.name  # add date and filename for this block

                # The below can copy peak information from the automatic integrations into the spreadsheet
                # peaks1 = [sample.peak for sample in run.samples if sample.sample_num in [0,2,4,6,8]]
                # peaks2 = [sample.peak for sample in run.samples if sample.sample_num in [1,3,5,7,9]]
                # df.loc[0:5, 'peak1'] = [(peak.pa if peak else None) for peak in peaks1]
                # df.loc[0:5, 'peak2'] = [(peak.pa if peak else None) for peak in peaks2]

                master_df = master_df.append(
                    df)  # append block to all new ones so far

            # TODO: Anything touching sheets need to be carefully made to catch inacessible files ######################
            append_df_to_excel(methane_sheet, master_df,
                               **{'index':
                                  False})  # add all new lines and save sheet
            add_formulas_and_format_sheet(
                methane_sheet
            )  # open sheet and add formulas where non-existent, format cols

            logger.info(
                'New GcRuns added to the automated integration spreadsheet.')

            methane_sheet_config.last_data_date = most_recent_gcrun
        else:
            logger.info(
                'No new GcRuns found to add to the automated integration spreadsheet.'
            )

        core_session.merge(methane_sheet_config)
        core_session.commit()

        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        return True

    except Exception as e:
        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        logger.error(f'Exception {e.args} occurred in update_excel_sheet()')
        send_processor_email(PROC, exception=e)
        return False
Esempio n. 23
0
async def plot_dailies(logger):
    """
    Loads dailies for the last 3 weeks and plots with ticks for every three days and minor ticks for every day.
    Plots are registered with the core database so they're uploaded to the Taylor drive.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from pathlib import Path
        import datetime as dt
        from summit_core import connect_to_db, core_dir, TempDir, Config, Plot, add_or_ignore_plot, create_daily_ticks
        plotdir = core_dir / 'plots/daily'
        remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/protected/plots'

        try:
            os.chdir(plotdir)
        except FileNotFoundError:
            os.mkdir(plotdir)

    except ImportError as e:
        logger.error(f'ImportError occurred in plot_dailies()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_daily.sqlite',
                                        core_dir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the database in plot_dailies()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Plot.__table__.create(core_engine, checkfirst=True)
        Config.__table__.create(core_engine, checkfirst=True)

        daily_config = core_session.query(Config).filter(
            Config.processor == PROC).one_or_none()

        if not daily_config:
            daily_config = Config(
                processor=PROC, days_to_plot=21
            )  # use all default values except processor on init
            core_session.add(daily_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running plot_dailies()')

        date_ago = datetime.now() - dt.timedelta(
            days=daily_config.days_to_plot +
            1)  # set a static for retrieving data at beginning of plot cycle

        date_limits, major_ticks, minor_ticks = create_daily_ticks(
            daily_config.days_to_plot, minors_per_day=1)

        major_ticks = [t for ind, t in enumerate(major_ticks)
                       if ind % 3 == 0]  # use every third daily tick

        dailies = session.query(Daily).filter(Daily.date >= date_ago).order_by(
            Daily.date).all()

        dailydict = {}
        for param in daily_parameters:
            dailydict[param] = [getattr(d, param) for d in dailies]

        with TempDir(plotdir):  ## PLOT i-butane, n-butane, acetylene

            name = summit_daily_plot(dailydict.get('date'), ({
                'Ads Xfer A': [None, dailydict.get('ads_xfer_a')],
                'Ads Xfer B': [None, dailydict.get('ads_xfer_b')],
                'Valves Temp': [None, dailydict.get('valves_temp')],
                'GC Xfer Temp': [None, dailydict.get('gc_xfer_temp')],
                'Catalyst': [None, dailydict.get('catalyst')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': 0,
                                         'top': 475
                                     },
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            hot_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(hot_plot, core_session)

            name = summit_daily_plot(dailydict.get('date'), ({
                'CJ1 Temp': [None, dailydict.get('cj1')],
                'CJ2 Temp': [None, dailydict.get('cj2')],
                'Standard Temp': [None, dailydict.get('std_temp')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': 10,
                                         'top': 50
                                     },
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            room_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(room_plot, core_session)

            name = summit_daily_plot(dailydict.get('date'), ({
                'H2 Gen Pressure': [None, dailydict.get('h2_gen_p')],
                'Line Pressure': [None, dailydict.get('line_p')],
                'Zero Pressure': [None, dailydict.get('zero_p')],
                'FID Pressure': [None, dailydict.get('fid_p')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': 0,
                                         'top': 75
                                     },
                                     y_label_str='Pressure (PSI)',
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            pressure_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(pressure_plot, core_session)

            name = summit_daily_plot(dailydict.get('date'), ({
                'Inlet Short Temp': [None, dailydict.get('inlet_short')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': 0,
                                         'top': 60
                                     },
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            inlet_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(inlet_plot, core_session)

            name = summit_daily_plot(dailydict.get('date'), ({
                'Battery V': [None, dailydict.get('battv')],
                '12Va': [None, dailydict.get('v12a')],
                '15Va': [None, dailydict.get('v15a')],
                '15Vb': [None, dailydict.get('v15b')],
                '24V': [None, dailydict.get('v24')],
                '5Va': [None, dailydict.get('v5a')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': 0,
                                         'top': 30
                                     },
                                     y_label_str='Voltage (v)',
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            voltage_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(voltage_plot, core_session)

            name = summit_daily_plot(dailydict.get('date'), ({
                'MFC1': [None, dailydict.get('mfc1')],
                'MFC2': [None, dailydict.get('mfc2')],
                'MFC3a': [None, dailydict.get('mfc3a')],
                'MFC3b': [None, dailydict.get('mfc3b')],
                'MFC4': [None, dailydict.get('mfc4')],
                'MFC5': [None, dailydict.get('mfc5')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': -1,
                                         'top': 3.5
                                     },
                                     y_label_str='Flow (Ml/min)',
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            flow_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(flow_plot, core_session)

        core_session.commit()
        core_session.close()
        core_engine.dispose()

        session.close()
        engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in plot_dailies()')
        send_processor_email(PROC, exception=e)
        session.close()
        engine.dispose()
        return False
Esempio n. 24
0
async def dual_plot_methane(logger):
    """
    Connects to both the methane [gc] and picarro databases to create an overlayed plot of both data.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    PROC = 'Methane DualPlotter'

    try:
        from pathlib import Path
        from summit_core import core_dir, Config
        from summit_core import methane_dir
        from summit_core import picarro_dir
        from summit_core import connect_to_db, create_daily_ticks, TempDir, Plot, add_or_ignore_plot
        from summit_picarro import Datum
        from summit_methane import Base, GcRun, summit_methane_plot

        from summit_picarro import Base as PicarroBase

        remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots'

    except ImportError as e:
        logger.error('ImportError occurred in dual_plot_methane()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        gc_engine, gc_session = connect_to_db(
            'sqlite:///summit_methane.sqlite', methane_dir)
        Base.metadata.create_all(gc_engine)

        picarro_engine, picarro_session = connect_to_db(
            'sqlite:///summit_picarro.sqlite', picarro_dir)
        PicarroBase.metadata.create_all(picarro_engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in dual_plot_methane()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Plot.__table__.create(core_engine, checkfirst=True)
        Config.__table__.create(core_engine, checkfirst=True)

        twoplot_config = core_session.query(Config).filter(
            Config.processor == PROC).one_or_none()

        if not twoplot_config:
            twoplot_config = Config(
                processor=PROC
            )  # use all default values except processor on init
            core_session.add(twoplot_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running dual_plot_methane()')

        newest_picarro_data_point = (picarro_session.query(Datum.date).filter(
            Datum.mpv_position == 1).order_by(Datum.date.desc()).first()[0])
        try:
            newest_gc_data_point = (gc_session.query(GcRun.date).filter(
                GcRun.median != None).filter(GcRun.standard_rsd < .02).filter(
                    GcRun.rsd < .02).order_by(GcRun.date.desc()).first()[0])
        except TypeError:
            logger.error(
                'NoneType not subscriptable encountered due to lack of methane data to query.'
            )
            from summit_errors import send_processor_warning
            send_processor_warning(
                PROC, 'Dual Plotter',
                '''The Methane Dual Plotter could not query any GcRuns for methane data.\n
                                   Check the database to make sure there are in fact GcRuns with medians and valid rsds.
                                   \nThis often happens when the methane database is remade without re-setting 
                                   the filesize and pa_startlie in the config table of Core database, 
                                   thus no peaks are found.''')
            return False

        newest_data_point = max(newest_picarro_data_point,
                                newest_gc_data_point)

        if newest_data_point <= twoplot_config.last_data_date:
            logger.info('No new data was found to plot.')
            core_session.close()
            core_engine.dispose()
            picarro_session.close()
            picarro_engine.dispose()
            return False

        date_limits, major_ticks, minor_ticks = create_daily_ticks(
            twoplot_config.days_to_plot)

        if newest_data_point > twoplot_config.last_data_date:

            runs_with_medians = (gc_session.query(GcRun).filter(
                GcRun.median != None).filter(GcRun.standard_rsd < .02).filter(
                    GcRun.rsd < .02).order_by(GcRun.date).all())

            gc_dates = [run.date for run in runs_with_medians]
            gc_ch4 = [run.median for run in runs_with_medians]

            picarro_data = (picarro_session.query(
                Datum.date, Datum.ch4).filter((Datum.mpv_position == 0) | (
                    Datum.mpv_position == 1)).filter(
                        (Datum.instrument_status == 963),
                        (Datum.alarm_status == 0)).filter(
                            Datum.date >= date_limits['left']).all()
                            )  # grab only data that falls in plotting period

            picarro_dates = [p.date for p in picarro_data]
            picarro_ch4 = [p.ch4 for p in picarro_data]

            with TempDir(methane_dir / 'plots'):
                name = summit_methane_plot(
                    None, {
                        'Summit Methane [Picarro]':
                        [picarro_dates, picarro_ch4],
                        'Summit Methane [GC]': [gc_dates, gc_ch4]
                    },
                    title='Summit Methane [Picarro & GC]',
                    limits={
                        'bottom': 1850,
                        'top': 2050,
                        'right': date_limits.get('right', None),
                        'left': date_limits.get('left', None)
                    },
                    major_ticks=major_ticks,
                    minor_ticks=minor_ticks)

                methane_plot = Plot(methane_dir / 'plots' / name, remotedir,
                                    True)  # stage plots to be uploaded
                add_or_ignore_plot(methane_plot, core_session)

                twoplot_config.last_data_date = newest_data_point
                core_session.merge(twoplot_config)

            logger.info('New data plots created.')
        else:
            logger.info('No new data found to be plotted.')

        gc_session.close()
        gc_engine.dispose()

        picarro_session.close()
        picarro_engine.dispose()

        core_session.commit()

        core_session.close()
        core_engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in dual_plot_methane()')
        send_processor_email(PROC, exception=e)

        core_session.close()
        core_engine.dispose()

        gc_session.close()
        gc_engine.dispose()

        picarro_session.close()
        picarro_engine.dispose()
        return False