def get_standard_quants(name, string=False, set_=True, session=None):
    """
    Get the quantification objects or string names of the compounds for a given standard.

    By default, returns a frozen set for performance. In general, this query is used to check string membership, making
    an immutable set highly desirable.

    :param str name: name of the standard to search for
    :param bool string: if True, return string names of each quantification instead of object, else return Quant objects
    :param bool set_: if True, return the result as a frozenset, otherwise returns tuple
    :param Session session: an active Sqlalchemy session
    :return tuple | frozenset: resulting objects or string names
    """

    if not session:
        _, session = connect_to_db(DB_NAME, CORE_DIR)

    std = session.query(Standard).filter(Standard.name == name).one()

    if string:
        quants = [q.name for q in std.quantifications]
    else:
        quants = [q for q in std.quantifications]

    if set_:
        return frozenset(quants)
    else:
        return tuple(quants)
def test_blank_subtract_mixin():

    engine, session = connect_to_db(DB_NAME, CORE_DIR)

    runs = session.query(GcRun).filter(GcRun.quantified == True).all()[:100:4]

    for run in runs:
        run.blank_subtract(session=session)
Exemple #3
0
def plot_history():
    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
    except Exception as e:
        return False

    compounds_to_plot = (session.query(Quantification.name).join(
        Standard, Quantification.standard_id == Standard.id).filter(
            Standard.name == 'quantlist').all())

    compounds_to_plot[:] = [q.name for q in compounds_to_plot]

    date_limits, major_ticks, minor_ticks = create_monthly_ticks(
        84, days_per_minor=0, start=datetime(2013, 1, 1))

    major_ticks = major_ticks[::4]

    with PLOT_INFO.open('r') as file:
        compound_limits = json.loads(file.read())

    for name in compounds_to_plot:

        old_results = (session.query(OldData.date, OldData.mr).filter(
            OldData.name == name).order_by(OldData.date).all())

        dates = [o.date for o in old_results]
        mrs = [o.mr for o in old_results]

        with open(
                CORE_DIR /
                f'DataSelectors/FinalDataSelector/data/{name}_filtered.json',
                'r') as f:
            new_final_data = json.load(f)

        dates.extend([
            datetime.fromtimestamp(n['date'], tz=timezone(timedelta(hours=1)))
            for n in new_final_data
        ])
        mrs.extend([n['mr'] for n in new_final_data])

        p = MixingRatioPlot({name: [dates, mrs]},
                            limits={
                                **date_limits,
                                **compound_limits[name]
                            },
                            major_ticks=major_ticks,
                            minor_ticks=minor_ticks,
                            filepath=PLOTDIR / f'{name}_plot.png')

        p.plot()

    session.commit()
    session.close()
    engine.dispose()
def filter_for_new_entities(objs, orm_class, attr, session=None):
    """
    Filter a list of sqlalchemy class instances for only those whose attribute attr are not already in the databse.

    The provided list of objects is checked against the database by returning only objects whose specified attribute
    does not match any entry in the database for that class. Frequently used with paths or dates to determine what
    unique data to add to and commit to the database.

    :param Sequence objs: sequence of objects to check against the database
    :param orm_class: Declared class of the provided objects
    :param str attr: Attribute of the orm class to compare against the database
        **IntegrityErrors are completely possible if attr is not actually a unique-constrained attribute of orm_class
    :param Session session: an active sqlalchemy session to use; created and closed internally if not given
    :return list: potentially empty list of all objs that are not already present in the database
    :raises TypeError: if session is not a sqlalchemy session, or the orm_class is not a declared sqlalchemy class
    """
    if not type(orm_class) is type(Base):
        msg = 'orm_class must be a declared sqlalchemy class'
        raise TypeError(msg)

    if not session:
        _, session = connect_to_db(DB_NAME, CORE_DIR)
        close_on_exit = True
    else:
        if not isinstance(session, Session):
            msg = 'Provided session must be an active sqlalchemy session'
            raise TypeError(msg)
        close_on_exit = False

    obj_attrs = [getattr(o, attr) for o in objs]
    obj_attr_sets = split_into_sets_of_n(obj_attrs,
                                         750)  # avoid SQLite var limit of 1000

    objs_in_db = []
    for set_ in obj_attr_sets:
        db_objs_from_set = session.query(orm_class).filter(
            getattr(orm_class, attr).in_(set_)).all()
        for item in db_objs_from_set:
            objs_in_db.append(item)

    attrs_in_db = {getattr(e, attr) for e in objs_in_db}

    new_objs = []
    for obj in objs:
        if getattr(obj, attr) not in attrs_in_db:
            new_objs.append(obj)  # add new object to return list
            # adding the new obj attr to the check-against list is required to prevent duplicates in one batch
            attrs_in_db.add(getattr(obj, attr))

    if close_on_exit:
        session.close()

    return new_objs
Exemple #5
0
def test_basic_data_types():
    _, session = connect_to_db(DB_NAME, CORE_DIR)

    for cls in (Compound, LogFile, Daily, DailyFile, Integration, GcRun,
                OldData, Quantification, Standard):
        instances = session.query(cls).limit(5).all()
        print(f'\n{cls.__name__}: ')
        for instance in instances:
            print(repr(instance))

    runs = session.query(GcRun).limit(5).all()
    quant = SampleQuant(*runs)

    print(repr(quant))
Exemple #6
0
def plot_blank_data(logger):
    logger.info('Running plot_blank_data()')

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
    except Exception as e:
        logger.error(f'Error {e.args} prevented connecting to the database in plot_new_data()')
        return False

    compounds_to_plot = (session.query(Quantification.name)
                         .join(Standard, Quantification.standard_id == Standard.id)
                         .filter(Standard.name == 'quantlist').all())
    compounds_to_plot[:] = [q.name for q in compounds_to_plot]

    date_limits, major_ticks, minor_ticks = create_monthly_ticks(6)

    with open(JSON_PUBLIC_DIR / 'zug_plot_info.json', 'r') as file:
        compound_limits = json.loads(file.read())

    for name in compounds_to_plot:
        results = (session.query(Compound.mr, Integration.date)
                          .join(Integration, Integration.id == Compound.integration_id)
                          .join(GcRun, GcRun.id == Integration.run_id)
                          .filter(Integration.date >= datetime(2018, 3, 1))
                          .filter(GcRun.type == 0)
                          .filter(Compound.name == name)
                          .order_by(Integration.date)
                          .all())

        dates = [r.date for r in results]
        mrs = [r.mr for r in results]

        p = MixingRatioPlot(
            {name: [dates, mrs]},
            limits={**date_limits, 'bottom': 0, 'top': compound_limits.get(name).get('top') * .10},
            # plotting from 0 to 10% of the max value for each compound for good blank scaling
            major_ticks=major_ticks,
            minor_ticks=minor_ticks,
            filepath=PLOT_DIR / f'{name}_plot.png'
        )

        p.plot()

    session.commit()
    session.close()
    engine.dispose()

    return True
def get_query(params, filters):
    """
    Create a query with only a list of parameters to grab, and filters to apply after

    Use SQLalchemy internals to poke around and get classes, etc
    """
    engine, session = connect_to_db(DB_NAME, CORE_DIR)

    q = session.query(*params)  # kick off the query

    classes = []
    for p in params:
        parent_class = p.parent.class_
        classes.append(
            parent_class
        ) if parent_class not in classes else None  # need order, so hack around a set...

    base = classes.pop(0)  # grab first class from list
    linked = [base]  # the first class is inherently already in the join-chain

    if classes:  # any more classes?
        for c in classes:
            relations_for_c = relations.get(c.__name__)

            if not relations_for_c:
                msg = f'{c.__name__} does not have any defined relationships.'
                raise NotImplementedError(msg)

            relation = relations_for_c.get(base.__name__)

            if relation:
                q = q.join(c, relation.key == relation.fkey)
            else:
                msg = f'{c.__name__} is not directly related to {base} in the schema.'
                raise NotImplementedError(msg)

    for f in filters:
        q = q.filter(f)

    return q.all()[:10]
import json
import statistics as s

from datetime import datetime
from calendar import monthrange

import pandas as pd

from settings import CORE_DIR, JSON_PUBLIC_DIR, DB_NAME
from IO.db import connect_to_db, GcRun, Compound, Standard, final_data_first_sample_only_filter
from plotting import create_daily_ticks, create_monthly_ticks, AnnotatedResponsePlot

with open(JSON_PUBLIC_DIR / 'zug_plot_info.json', 'r') as file:
    compound_limits = json.loads(file.read())

engine, session = connect_to_db(DB_NAME, CORE_DIR)

start_date = datetime(2018, 3, 1)
end_date = datetime(2018, 3, 1)

date_ranges = pd.period_range(start_date, end_date, freq='1M')

standard = (session.query(Standard).filter(Standard.name == 'quantlist').one())

compounds = [q.name for q in standard.quantifications]

BASE_PLOT_DIR = CORE_DIR / 'analyses/quality_control/preliminary_final_plots/plots'

if not BASE_PLOT_DIR.exists():
    BASE_PLOT_DIR.mkdir()
def abstract_query(params, filters, order=None, session=None):
    """
    Make a query for one or many parameters with no knowledge of the project structure needed.

    Easy query abstracts to the project layer and allows one to ignore the internal structure to a large degree. Making
    a query for cross-class attributes is as easy as asking for [Integration.filename, Integration.date, LogFile.date].
    A normal query would require a query for all three attributes and an explicit join of
    .join(LogFile, LogFile.integration_id == LogFile.id), but easy_query handles this internally by referencing metadata
    that's created after the models are defined, thus it will handle changes in the schema as well.

    :param Sequence[InstrumentedAttribute | DeclarativeMeta] params: one or many parameters to be queried,
        params will be output in their requested order
    :param Sequence filters: one or many filter expressions to apply,
        eg [Integration.id != 1, Integration.filename.like('2019_%'), LogFile.date >= datetime(2019, 1, 1)];
        filters *must* be given in their intended order of application
    :param order: parameter to order results by
    :return list: returns list of named tuples results
    :raises NotImplementedError: if any class in params cannot be joined appropriately
    :raises ValueError: if order is not also in params
    """

    if not session:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
        close_on_exit = True
    else:
        close_on_exit = False

    q = session.query(*params)  # kick off the query

    classes = []
    for p in params:
        if isinstance(p, DeclarativeMeta):
            parent_class = p  # is a class; just add it
        else:
            parent_class = p.parent.class_  # is an attribute; need to add it's parent class

        classes.append(
            parent_class
        ) if parent_class not in classes else None  # need order, so hack around a set...

    base = classes.pop(0)  # grab first class from list

    if classes:  # any more classes?
        for c in classes:
            relations_for_c = relations.get(c.__name__)

            if not relations_for_c:
                msg = f'{c.__name__} does not have any defined relationships.'
                raise NotImplementedError(msg)

            relation = relations_for_c.get(base.__name__)

            if relation:
                q = q.join(c, relation.key == relation.fkey)
            else:
                msg = f'{c.__name__} is not directly related to {base} in the schema.'
                raise NotImplementedError(msg)

    for f in filters:
        q = q.filter(f)

    if order:
        if order.parent.class_ not in (base, *classes):
            msg = 'Order must be an InstrumentedAttribute and must also be in the queried parameters'
            raise ValueError(msg)

        q = q.order_by(order)

    if close_on_exit:
        session.close()

    return q.all()
def get_df_with_filters(use_mrs, filters=None, compounds=None):
    """
    Retrieves a dataframe from the database of all mixing ratios or peak areas for all compounds with optional filters.

    Retrieves mixing ratios (or peak areas if use_mrs=False) for all compounds, and can be filtered with additional
    expressions. Expressions are added on a per-compound basis, so filtering for specific compounds is not yet possible.
    Returned DataFrame has a datetimeindex of GcRun dates, and a column per compound.

    :param bool use_mrs: Boolean specifying if mixing ratios should be returned. False will return peak areas instead
    :param Iterable filters: List containing Sqlalchemy filter expressions, eg [GcRun.type == 1, GcRun.quantified == 1]
        **Filters are added sequentially and should be given in their intended order
    :param Iterable compounds: list of compounds to query for, defaults to all quantified compounds if not given
    :return pd.DataFrame:
    """
    engine, session = connect_to_db(DB_NAME, CORE_DIR)

    if not compounds:
        standard = (
            session.query(Standard).filter(Standard.name == 'quantlist').one()
        )  # retrieve full list of compounds that are quantified from the database

        compounds = [q.name for q in standard.quantifications]

    # data will be unpacked into a dictionary of dates, where each date holds another OrderedDict of compounds:mrs
    dates = OrderedDict()

    mr_or_pa = Compound.mr if use_mrs else Compound.pa

    for compound in compounds:
        # get all ambient data that's not filtered
        results = (session.query(mr_or_pa, GcRun.date).join(
            GcRun, GcRun.id == Compound.run_id).filter(
                Compound.filtered == False).filter(Compound.name == compound))

        if filters:
            for expression in filters:
                results = results.filter(expression)

        results = results.order_by(GcRun.date).all()

        #  append to or create list with the results for that compound and date
        for r in results:
            try:
                dates[r.date][compound] = r[
                    0]  # add the compound as the key and mr/pa as the value
            except KeyError:
                dates[r.date] = OrderedDict([
                    (compound, r[0])
                ])  # if date doesn't exist yet, create it's ODict

    # re-sort based on date, for safety
    dates = OrderedDict(sorted(dates.items(), key=lambda kv_pair: kv_pair[0]))

    for date, results in dates.items():
        dates[date] = OrderedDict(
            sorted(
                results.items(),
                # sort by key-value pair, using the key's index in the list of compounds to determine their order
                key=lambda kv_pair: compounds.index(kv_pair[0])))

    return pd.DataFrame.from_dict(dates, orient='index')
"""
The filter file 2019_10_30_filters.json was somehow created with a time offset of -3 hours from Zugspitze time.

This fixes that, creating an _fixed file from the original.
"""
import os
import json
import datetime as dt

from datetime import datetime
from pathlib import Path

from settings import CORE_DIR, DB_NAME
from IO.db import connect_to_db, GcRun

engine, session = connect_to_db(DB_NAME, CORE_DIR)  # use the database to verify all 'fixed' dates

file = Path(os.getcwd()) / '2019_10_30_filters.json'

data = json.loads(file.read_text())

new_dict = {}
proof = {}  # helper dict to show conversion

for k, v in data.items():
    date = datetime.strptime(k[:16], '%Y-%m-%d %H:%M')
    date += dt.timedelta(hours=3)

    gc_run = (session.query(GcRun)
              .filter(GcRun.date >= date, GcRun.date <= date + dt.timedelta(minutes=1))
              .one_or_none())  # search within one minute of date