Example #1
0
def update_years():
    """
    To compensate for gaps in the DWD data, where no readings are available. The table is suitable to left outer join
    yearly aggrates from reading tables to it.
    :return:
    """

    def days(year):
        if year == thisyear:
            last = date.today()
        else:
            last = date(year, 12, 31)
        return (last - date(year, 1, 1)).days + 1

    thisyear = date.today().year
    with johanna.Connection(text=f"create? table years") as c:
        c.cur.executescript("""
            CREATE TABLE IF NOT EXISTS years (
                year INTEGER,
                days INTEGER,
                PRIMARY KEY (year)
            );
        """)
    # TODO years interval could be retrieved from the stations table
    # TODO could be optimized a little bit to not insert when first year in range ia already there and last one is ok
    years = [(y, days(y)) for y in range(1700, 2051)]
    with johanna.Connection(text=f"insert? {len(years)} years") as c:
        c.cur.executemany("INSERT OR REPLACE INTO years VALUES (?, ?)", years)
        c.commit()
Example #2
0
def _upsert(rows: List[tuple]) -> None:
    with johanna.Timer() as t:
        # database supplied by johanna
        with johanna.Connection(text=f"create? table stations") as c:
            c.cur.executescript(SQL_CREATE_STATIONS)
        with johanna.Connection("insert stations") as c:
            # https://database.guide/how-on-conflict-works-in-sqlite/
            c.cur.executemany("""
                INSERT OR REPLACE INTO stations
                VALUES (?,?,?,?,?,?,?,?, ?,?,?,?,?,?)
            """, rows)
            c.commit()
    logging.info(f"Upserted {len(rows)} stations to the database  {t.read()}")
Example #3
0
    def __init__(self, station):
        # select < 0.6 millis :)
        sql = """select 
                name, land, yyyymmdd_von, yyyymmdd_bis, 
                ifnull(rc.yyyymmddhh, '1700010100'),
                ifnull(max(rd.dwdts), '1700010100') 
            from stationen s
            left outer join recent rc on s.station = rc.station
            left join readings rd on s.station = rd.station
            where s.station = ?"""

        self.station = station
        with johanna.Connection(f"Station.__init__({station})") as c:
            c.cur.execute(sql, (station, ))
            row = c.cur.fetchone()
        if row:
            self.name = row[0]
            self.land = row[1]
            self.isodate_von = row[2]
            self.isodate_bis = row[3]
            self.dwdts_recent = row[4]  # aus Tabelle
            self.dwdts_readings = row[5]  # aus Daten
            assert self.dwdts_recent == self.dwdts_readings, \
                f"recent: {self.dwdts_recent} vs. Daten: {self.dwdts_readings}"
            self.populated = True
            self.description = f"{self.station}, {self.name} ({LAND_MAP[self.land]})"
            logging.info(
                f"{self.description}: {self.isodate_von}..{self.isodate_bis} "
                f"rc={self.dwdts_recent} rd={self.dwdts_readings}")
        else:
            self.populated = False
Example #4
0
    def __init__(self, ftp: FTP, fnam: str, verbose: bool = False):
        """
        :param ftp: geöffnete FTP Verbindung mit dem richtigen Arbeitsverzeichnis
        :param fnam: Name des herunterzuladenden Files
        :param verbose: Konsolenausgabe als Fortschrittinfo -- DO NOT USE IN PRODUCTION
        """
        self._verbose = verbose
        self.did_download = False
        logging.info(f'DataFile(_,"{fnam}")')

        station_nr = int(fnam.split(".")[0].split("_")
                         [2])  # geht erfreulicherweise für hist und akt
        self.station = Station(station_nr)
        logging.info(
            f"Station {self.station.description} (Daten bis {self.station.dwdts_recent} bereits vorhanden)"
        )
        if is_data_expected(fnam, self.station):
            with johanna.Timer() as t:
                with TemporaryDirectory() as temp_dir:
                    temp_dir = Path(temp_dir)
                    logging.info(f"Temporäres Verzeichnis: {temp_dir}")
                    zipfile_path = ftplight.ftp_retrbinary(ftp,
                                                           from_fnam=fnam,
                                                           to_path=temp_dir /
                                                           fnam,
                                                           verbose=True)
                    if not zipfile_path:
                        johanna.flag_as_error()
                        logging.error(
                            f"Kann die Daten der Station {self.station.description} nicht herunterladen."
                        )
                        return
                    produkt_path = self._extract(zipfile_path, temp_dir)
                    readings = self._parse(produkt_path)
                    if readings:
                        # TODO connection mit retry absichern
                        with johanna.Connection("insert readings") as c:
                            self._insert_readings(readings, c)
                            last_date = self._update_recent(readings, c)
                            c.commit()  # gemeinsamer commit ist sinnvoll
                        logging.info(
                            f"Werte für Station {self.station.description} bis {last_date} verarbeitet {t.read()}"
                        )
                    else:
                        logging.info(
                            f"Keine Werte für Station {self.station.description} nach {self.station.dwdts_recent} gefunden {t.read()}"
                        )
            if temp_dir.exists():
                johanna.flag_as_error()
                logging.error(
                    f"Temporäres Verzeichnis {temp_dir} wurde NICHT entfernt")
        else:
            logging.info(
                f"File {fnam} wird nicht heruntergeladen, da keine neuen Daten zu erwarten sind."
            )
Example #5
0
 def _upsert(self):
     with johanna.Timer() as t:
         with johanna.Connection("insert stationen") as c:
             # https://database.guide/how-on-conflict-works-in-sqlite/
             c.cur.executemany(
                 """
                 INSERT OR REPLACE INTO stationen
                 VALUES (?,?,?,?,?,?,?,?)
             """, self.rows)
             c.commit()
     logging.info(
         f"{self.cnt} Stationen in die Datenbank geschrieben {t.read()}")
Example #6
0
def get_columns(tabnam: str = "readings") -> List[tuple]:
    """
    Get column list for table. Buffered, so you can access as often as you like. But does not return copies,
    so do not modify the list returned.
    :param tabnam: table name in current johanna database
    :return: list of tuples (colnam: str, type: str, primary_key: int)
    """
    if tabnam not in get_columns.buffer:
        with johanna.Connection(f"columns of {tabnam}") as c:
            rows = c.cur.execute(SQL_COLUMNS, (tabnam, )).fetchall()
        get_columns.buffer[tabnam] = rows
    return get_columns.buffer[tabnam]
Example #7
0
def get_two(station: int,
            dwdts: str,
            tabname: str = "readings",
            fields: List[str] = None):
    if "-" in dwdts:
        dwdts = dwdts.replace("-", "")
    if not fields:
        fields = get_data_fields(tabname)
    sql = "select " + f"dwdts, {', '.join(fields)} from {tabname} where station = ? and dwdts >= ? order by dwdts limit 2"
    # logging.info(sql)
    with johanna.Connection(f"from dwdts = {dwdts}", quiet=True) as c:
        rows = c.cur.execute(sql, (station, dwdts)).fetchall()
    return rows
Example #8
0
def get_missingdays(station: int,
                    tabname: str = "readings") -> Tuple[list, list]:
    """
    Retrieves
    :param station: the station to assess
    :param tabname: name of the table where the readings are stored, defualts to "readings"
    :return: hit set with the data calculated and list of data fields (see example select)
    """
    sql, fields = generate_missingdays_select(tabname)
    with johanna.Connection(f"missing days") as c:
        rows = c.cur.execute(sql, (
            station,
            station,
        )).fetchall()
    return rows, fields
Example #9
0
def overview(station: int,
             tabname: str = "readings",
             fields: List[str] = None,
             with_rows: bool = False) -> List[Timeframe]:
    assert isinstance(station, int)
    assert isinstance(tabname, str)
    if not fields:
        fields = get_data_fields(tabname=tabname)
    assert isinstance(fields, list)
    assert isinstance(with_rows, bool)

    sql = get_indicator_select(tabname=tabname, fields=fields)
    with johanna.Connection(f"select from {tabname}") as c:
        rows = c.cur.execute(sql, (station, )).fetchall()
    tfs = []
    ts0 = PointInTime(rows[0][0])
    srow0 = "".join(rows[0][1:])  # indicator string
    tf = Timeframe(ts0, None, srow0, None, None)
    tfs.append(tf)
    for i, row in enumerate(rows[1:]):
        ts = PointInTime(row[0])
        srow = "".join(row[1:])  # indicator string
        if ts - ts0 > 1:  # not next day
            # we passed an occurence of '---------' ('-' only)
            #   -> insert n/a interval: [x, _, old] -> [x, ts0, old], [ts0+1, ts-1, n/a], [ts, _, new]
            tf.ts_to = ts0
            tfs.append(Timeframe(ts0.next(), ts.prev(), "no data", None, None))
            tf = Timeframe(ts, None, srow, None, None)
            tfs.append(tf)
        elif srow != srow0:
            tf.ts_to = ts0
            tf = Timeframe(ts, None, srow, None, None)
            tfs.append(tf)
        ts0 = ts
        srow0 = srow
    tf.ts_to = ts
    for tf in tfs:
        tf.days = tf.ts_to - tf.ts_from + 1
    if with_rows:
        for tf in tfs:
            tf.rows = get_two(station,
                              tf.ts_to.dwdts(),
                              tabname=tabname,
                              fields=fields)
    return tfs
Example #10
0
 def __init__(self, station: Union[int, str]):
     if isinstance(station, str):
         station = int(station)
     sql = """select 
             name, land_short, isodate_from, isodate_to, description
         from stations
         where station = ?"""
     self.station = station
     with johanna.Timer() as t:
         with johanna.Connection(f"Station.__init__({station})") as c:
             c.cur.execute(sql, (station,))
             row = c.cur.fetchone()
             if row:
                 self.name, self.land_short, self.isodate_from, self.isodate_to, self.description = row
                 self.populated = True
             else:
                 self.name, self.land_short, self.isodate_from, self.isodate_to, self.description = (None,) * 5
                 self.populated = False
     logging.info(f"got {self.description}: {self.isodate_from}..{self.isodate_to}  {t.read()}")
Example #11
0
#!/usr/bin/env python
# coding: utf-8
"""
Simple test program for johanna in interactive mode.
Creates and uses a ~/.johanna folder which can be disposed at will.

Created: 06.09.20
"""

import johanna

if __name__ == "__main__":
    johanna.interactive(dbname="hurz.sqlite")
    # johanna.apply_schema("./schema.sql")
    with johanna.Connection("Charlotte") as c:
        # need to run charlotte.py
        c.cur.execute("select * from kvpairs")
        for row in c.cur:
            print(row)
    johanna.flag_as_error()
Example #12
0
def main():
    johanna.apply_schema("./schema.sql")
    with johanna.Connection("Charlotte") as c:
        c.cur.execute("insert or ignore into kvpairs(k, v) values (1, 'eins')")
        c.commit()
    johanna.flag_as_error()
Example #13
0
def plot(plt,
         station: int = const.MANNHEIM,
         monat: int = 6,
         stunde: int = 12,
         von: int = 0,
         bis: int = 3000) -> None:
    """

    :param plt: wird vom jupyter notebook bereitgestellt:
            from matplotlib import pyplot as plt
            %matplotlib inline # <- deswegen!
    :param station: numerischer Stations-Schlüssel
    :param monat: 1 = Januar,...
    :param stunde: 0..23
    :param von: Jahre (4-stell.)
    :param bis: Jahre (4-stell.)
    """
    with johanna.Timer() as overall:
        name = _get_station_name(station)

        with johanna.Timer() as timer:
            with johanna.Connection("select readings") as c:
                c.cur.execute(
                    '''
                    select year, avg(temp) val
                        from readings
                        where station = ?
                          and month = ?
                          and hour = ?
                          and year between ? and ?
                        group by year
                        order by year asc
                ''', (station, monat, stunde, von, bis))
                rows = [row for row in c.cur]
            x_db, y_db = _transpose(rows)
        logging.info(f"Select: {timer.read()}")

        # https://realpython.com/linear-regression-in-python/#simple-linear-regression-with-scikit-learn
        x = np.array(x_db).reshape((-1, 1))
        y = np.array(y_db)

        with johanna.Timer() as timer:
            model = LinearRegression().fit(x, y)
            logging.info(f"dT p.a.: {model.coef_}")
            x_pred = np.array([x_db[0], x_db[-1]]).reshape(
                (-1, 1))  # nur die Enden
            y_pred = model.predict(x_pred)
        logging.info(f"LinearRegression: {timer.read()}")

        # https://towardsdatascience.com/linear-regression-using-python-b136c91bf0a2
        plt.rc('figure', figsize=(20.0, 10.0))
        plt.scatter(x, y, s=10, color='green', label="Einzelwerte")
        plt.xlabel('Jahr')
        plt.ylabel('Mitteltemperatur %d Uhr (UTC), %s, %s' %
                   (stunde, const.monat_as_string(monat), name))
        plt.plot(x_pred, y_pred, color='red', label="Trend")
        # https://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.legend
        plt.legend(loc=4)
        plt.show()

    logging.info(f"Overall: {overall.read()}")