Beispiel #1
0
class DwdWeather:

    # DWD CDC HTTP server.
    baseuri = 'https://opendata.dwd.de/climate_environment/CDC'

    # Observations in Germany.
    germany_climate_uri = baseuri + '/observations_germany/climate/{resolution}'

    def __init__(self, resolution=None, category_names=None, **kwargs):

        # =================
        # Configure context
        # =================

        # Data set selector by resolution (houry, 10_minutes).
        self.resolution = resolution

        # Categories of measurements.
        self.categories = self.resolve_categories(category_names)

        if "debug" in kwargs:
            self.debug = int(kwargs["debug"])
        else:
            self.debug = 0

        # Storage location
        cp = None
        if "cache_path" in kwargs:
            cp = kwargs["cache_path"]
        self.cache_path = self.get_cache_path(cp)

        # =================================
        # Acquire knowledgebase information
        # =================================

        # Database field definition
        knowledge = DwdCdcKnowledge.climate.get_resolution_by_name(
            self.resolution)
        self.fields = DwdCdcKnowledge.as_dict(knowledge)

        # Sanity checks
        if not self.fields:
            log.error(
                'No schema information for resolution "%s" found in knowledge base.',
                self.resolution)
            sys.exit(1)

        # =====================
        # Configure HTTP client
        # =====================
        self.cdc = DwdCdcClient(self.resolution, self.cache_path)

        # ========================
        # Configure cache database
        # ========================

        # Reset cache if requested
        if "reset_cache" in kwargs and kwargs["reset_cache"]:
            self.reset_cache()

        # Initialize
        self.init_cache()

    def resolve_categories(self, category_names):
        available_categories = deepcopy(DwdCdcKnowledge.climate.measurements)
        if category_names:
            categories = filter(
                lambda category: category['name'] in category_names,
                available_categories)
        else:
            categories = available_categories
        return categories

    def dict_factory(self, cursor, row):
        """
        For emission of dicts from sqlite3
        """
        d = {}
        for idx, col in enumerate(cursor.description):
            d[col[0]] = row[idx]
        return d

    def get_cache_path(self, path):
        if path is None:
            home = os.path.expanduser("~") + os.sep + ".dwd-weather"
        else:
            home = path
        if not os.path.exists(home):
            os.mkdir(home)
        return home

    def get_cache_database(self):
        database_file = os.path.join(self.cache_path, APP_NAME + ".db")
        return database_file

    def reset_cache(self):
        database_file = self.get_cache_database()

        if os.path.exists(database_file):
            os.remove(database_file)

    def init_cache(self):
        """
        Creates ``.dwd-weather`` directory in the current
        user's home, where a cache database and config
        file will reside.
        """
        database_file = self.get_cache_database()
        self.db = sqlite3.connect(database_file)
        self.db.row_factory = self.dict_factory
        c = self.db.cursor()

        tablename = self.get_measurement_table()

        # Create measurement tables and index.
        create_fields = []
        for category in sorted(self.fields.keys()):
            for fieldname, fieldtype in self.fields[category]:
                create_fields.append("%s %s" % (fieldname, fieldtype))
        create = 'CREATE TABLE IF NOT EXISTS {table} (station_id int, datetime int, {sql_fields})'.format(
            table=tablename, sql_fields=",\n".join(create_fields))
        index = 'CREATE UNIQUE INDEX IF NOT EXISTS {table}_uniqueidx ON {table} (station_id, datetime)'.format(
            table=tablename)
        c.execute(create)
        c.execute(index)

        # Create station tables and index.
        tablename = self.get_stations_table()
        create = """
            CREATE TABLE IF NOT EXISTS {table}
            (
                station_id int,
                date_start int,
                date_end int,
                geo_lon real,
                geo_lat real,
                height int,
                name text,
                state text
            )""".format(table=tablename)
        index = 'CREATE UNIQUE INDEX IF NOT EXISTS {table}_uniqueidx ON {table} (station_id, date_start)'.format(
            table=tablename)
        c.execute(create)
        c.execute(index)

        self.db.commit()

    def import_stations(self):
        """
        Load station meta data from DWD server.
        """
        for result in self.cdc.get_stations(self.categories):
            self.import_station(result.payload)

    def import_station(self, content):
        """
        Takes the content of one station metadata file
        and imports it into the database.
        """
        content = content.decode("latin1")
        content = content.strip()
        content = content.replace("\r", "")
        content = content.replace("\n\n", "\n")

        table = self.get_stations_table()

        insert_sql = """INSERT OR IGNORE INTO {table}
            (station_id, date_start, date_end, geo_lon, geo_lat, height, name, state)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?)""".format(table=table)
        update_sql = """UPDATE {table}
            SET date_end=?, geo_lon=?, geo_lat=?, height=?, name=?, state=?
            WHERE station_id=? AND date_start=?""".format(table=table)
        cursor = self.db.cursor()
        #print content
        linecount = 0
        for line in content.split("\n"):
            linecount += 1
            line = line.strip()
            if line == "" or line == u'\x1a':
                continue
            #print linecount, line
            if linecount > 2:
                # frist 7 fields
                parts = re.split(r"\s+", line, 6)
                # seperate name from Bundesland
                (name, bundesland) = parts[6].rsplit(" ", 1)
                name = name.strip()
                del parts[6]
                parts.append(name)
                parts.append(bundesland)
                #print parts
                for n in range(len(parts)):
                    parts[n] = parts[n].strip()
                station_id = int(parts[0])
                station_height = int(parts[3])
                station_lat = float(parts[4])
                station_lon = float(parts[5])
                station_start = int(parts[1])
                station_end = int(parts[2])
                station_name = parts[6]
                station_state = parts[7]
                # issue sql
                cursor.execute(
                    insert_sql,
                    (station_id, station_start, station_end, station_lon,
                     station_lat, station_height, station_name, station_state))
                cursor.execute(
                    update_sql,
                    (station_end, station_lon, station_lat, station_height,
                     station_name, station_state, station_id, station_start))
        self.db.commit()

    def import_measures(self, station_id, latest=True, historic=False):
        """
        Load data from DWD server.
        Parameter:

        station_id: e.g. 2667 (Köln-Bonn airport)

        latest: Load most recent data (True, False)
        historic: Load older values

        We download ZIP files for several categories
        of measures. We then extract one file from
        each ZIP. This path is then handed to the
        CSV -> Sqlite import function.
        """

        # Compute timerange labels / subfolder names.
        timeranges = []
        if latest:
            timeranges.append("recent")
        if historic:
            timeranges.append("historical")

        # Reporting.
        station_info = self.station_info(station_id)
        log.info("Downloading measurements for station %d" % station_id)
        log.info("Station information: %s" %
                 json.dumps(station_info, indent=2, sort_keys=True))

        # Download and import data.
        for category in self.categories:
            key = category['key']
            name = category['name'].replace('_', ' ')
            log.info('Downloading "{}" data ({})'.format(name, key))
            for result in self.cdc.get_measurements(station_id, category,
                                                    timeranges):
                # Import data for all categories.
                log.info(
                    'Importing measurements for station "{}" and category "{}"'
                    .format(station_id, category))
                #log.warning("No files to import for station %s" % station_id)
                self.import_measures_textfile(result)

    def datetime_to_int(self, datetime):
        return int(datetime.replace('T', '').replace(':', ''))

    def get_measurement(self, station_id, date):
        tablename = self.get_measurement_table()
        sql = 'SELECT * FROM {tablename} WHERE station_id = {station_id} AND datetime = {datetime}'.format(
            tablename=tablename, station_id=station_id, datetime=date)

        c = self.db.cursor()
        c.execute(sql)

        result = []
        for row in c.execute(sql):
            result.append(row)

        if len(result) > 0:
            return result[0]
        else:
            return None

    def insert_measurement(self, tablename, fields, value_placeholders,
                           dataset):
        sql = 'INSERT INTO {tablename} ({fields}) VALUES ({value_placeholders})'.format(
            tablename=tablename,
            fields=', '.join(fields),
            value_placeholders=', '.join(value_placeholders))

        c = self.db.cursor()
        c.execute(sql, dataset)
        #self.db.commit()

    def update_measurement(self, tablename, sets, dataset):
        sql = 'UPDATE {tablename} SET {sets} WHERE station_id = ? AND datetime = ?'.format(
            tablename=tablename, sets=', '.join(sets))

        c = self.db.cursor()
        c.execute(sql, dataset)
        #self.db.commit()

    def import_measures_textfile(self, result):
        """
        Import content of source text file into database.
        """

        category_name = result.category['name']
        category_label = category_name.replace('_', ' ')
        if category_name not in self.fields:
            log.warning(
                'Importing "{}" data from "{}" not implemented yet'.format(
                    category_label, result.uri))
            return

        log.info('Importing "{}" data from "{}"'.format(
            category_label, result.uri))

        # Create SQL template
        tablename = self.get_measurement_table()
        fieldnames = []
        value_placeholders = []
        sets = []

        fields = list(self.fields[category_name])
        for fieldname, fieldtype in fields:
            sets.append(fieldname + "=?")

        fields.append(('station_id', 'str'))
        fields.append(('datetime', 'datetime'))

        for fieldname, fieldtype in fields:
            fieldnames.append(fieldname)
            value_placeholders.append('?')

        # Create data rows.
        count = 0
        items = result.payload.decode("latin-1").split("\n")
        for line in tqdm(items, ncols=79):
            count += 1
            line = line.strip()
            if line == "" or line == '\x1a':
                continue
            line = line.replace(";eor", "")
            parts = line.split(";")
            for n in range(len(parts)):
                parts[n] = parts[n].strip()

            if count > 1:

                # Parse station id.
                parts[0] = int(parts[0])

                # Parse timestamp, ignore minutes.
                # Fixme: Is this also true for resolution=10_minutes?
                parts[1] = int(parts[1].replace('T', '').replace(':', ''))

                dataset = []
                # station_id and datetime
                #if category == "soil_temp":
                #    print fields[category]
                #    print parts
                for n in range(2, len(parts)):
                    (fieldname,
                     fieldtype) = self.fields[category_name][(n - 2)]
                    if parts[n] == "-999":
                        parts[n] = None
                    elif fieldtype == "real":
                        parts[n] = float(parts[n])
                    elif fieldtype == "int":
                        try:
                            parts[n] = int(parts[n])
                        except ValueError:
                            sys.stderr.write(
                                "Error in converting field '%s', value '%s' to int.\n"
                                % (fieldname, parts[n]))
                            (t, val, trace) = sys.exc_info()
                            traceback.print_tb(trace)
                            sys.exit()
                    elif fieldtype == "datetime":
                        parts[n] = self.datetime_to_int(parts[n])

                    dataset.append(parts[n])

                # station_id and datetime for WHERE clause
                dataset.append(parts[0])
                dataset.append(parts[1])

                #log.debug('SQL template: %s', sql_template)
                #log.debug('Dataset: %s', dataset)

                if self.get_measurement(parts[0], parts[1]):
                    self.update_measurement(tablename, sets, dataset)
                else:
                    self.insert_measurement(tablename, fieldnames,
                                            value_placeholders, dataset)
        self.db.commit()

    def get_data_age(self):
        """
        Return age of latest dataset as ``datetime.timedelta``.
        """
        sql = "SELECT MAX(datetime) AS maxdatetime FROM %s" % self.get_measurement_table(
        )
        c = self.db.cursor()
        c.execute(sql)
        item = c.fetchone()
        if item["maxdatetime"] is not None:
            latest = datetime.strptime(str(item["maxdatetime"]), "%Y%m%d%H")
            return datetime.utcnow() - latest

    def get_stations_table(self):
        return 'stations_%s' % self.resolution

    def get_measurement_table(self):
        return 'measures_%s' % self.resolution

    def get_timestamp_format(self):
        knowledge = DwdCdcKnowledge.climate.get_resolution_by_name(
            self.resolution)
        return knowledge.__timestamp_format__

    def query(self, station_id, timestamp, recursion=0):
        """
        Get values from cache.
        station_id: Numeric station ID
        timestamp: datetime object
        """
        if recursion < 2:
            sql = "SELECT * FROM %s WHERE station_id=? AND datetime=?" % self.get_measurement_table(
            )
            c = self.db.cursor()
            c.execute(
                sql,
                (station_id, timestamp.strftime(self.get_timestamp_format())))
            out = c.fetchone()
            if out is None:
                # cache miss
                age = (datetime.utcnow() - timestamp).total_seconds() / 86400
                if age < 360:
                    self.import_measures(station_id, latest=True)
                elif age >= 360 and age <= 370:
                    self.import_measures(station_id,
                                         latest=True,
                                         historic=True)
                else:
                    self.import_measures(station_id, historic=True)
                return self.query(station_id,
                                  timestamp,
                                  recursion=(recursion + 1))
            c.close()
            return out

    def haversine_distance(self, origin, destination):
        lon1, lat1 = origin
        lon2, lat2 = destination
        radius = 6371000  # meters

        dlat = math.radians(lat2 - lat1)
        dlon = math.radians(lon2 - lon1)
        a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
            * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
        d = radius * c
        return d

    def stations(self, historic=False):
        """
        Return list of dicts with all stations.
        """
        out = []
        table = self.get_stations_table()
        sql = """
            SELECT s2.*
            FROM {table} s1
            LEFT JOIN {table} s2 ON (s1.station_id=s2.station_id AND s1.date_end=s1.date_end)
            GROUP BY s1.station_id""".format(table=table)
        c = self.db.cursor()
        for row in c.execute(sql):
            out.append(row)
        c.close()
        if len(out) == 0:
            # cache miss - have to import stations.
            self.import_stations()
            out = self.stations()
        return out

    def station_info(self, station_id):
        table = self.get_stations_table()
        sql = "SELECT * FROM {table} WHERE station_id=?".format(table=table)
        c = self.db.cursor()
        c.execute(sql, (station_id, ))
        return c.fetchone()

    def nearest_station(self, lon, lat):
        # Select most current stations datasets.
        closest = None
        closest_distance = 99999999999
        for station in self.stations():
            d = self.haversine_distance(
                (lon, lat), (station["geo_lon"], station["geo_lat"]))
            if d < closest_distance:
                closest = station
                closest_distance = d
        return closest

    def stations_geojson(self):
        out = {"type": "FeatureCollection", "features": []}
        for station in self.stations():
            out["features"].append({
                "type": "Feature",
                "properties": {
                    "id": station["station_id"],
                    "name": station["name"]
                },
                "geometry": {
                    "type": "Point",
                    "coordinates": [station["geo_lon"], station["geo_lat"]]
                }
            })
        return json.dumps(out)

    def stations_csv(self, delimiter=","):
        """
        Return stations list as CSV.
        """
        csvfile = StringIO()
        # assemble field list
        headers = [
            "station_id", "date_start", "date_end", "geo_lon", "geo_lat",
            "height", "name"
        ]
        writer = csv.writer(csvfile,
                            delimiter=delimiter,
                            quoting=csv.QUOTE_MINIMAL)
        writer.writerow(headers)
        stations = self.stations()
        for station in stations:
            row = []
            for n in range(len(headers)):
                val = station[headers[n]]
                if val is None:
                    val = ""
                elif type(val) == int:
                    val = str(val)
                elif type(val) == float:
                    val = "%.4f" % val
                row.append(val)
            writer.writerow(row)
        contents = csvfile.getvalue()
        csvfile.close()
        return contents
Beispiel #2
0
class DwdWeather:

    # DWD CDC HTTP server.
    baseuri = "https://opendata.dwd.de/climate_environment/CDC"

    # Observations in Germany.
    germany_climate_uri = baseuri + "/observations_germany/climate/{resolution}"

    def __init__(self, resolution="hourly", category_names=None, **kwargs):

        # =================
        # Configure context
        # =================

        # Data set selector by resolution (daily, hourly, 10_minutes).
        self.resolution = resolution

        # Categories of measurements.
        self.categories = self.resolve_categories(category_names)

        if "debug" in kwargs:
            self.debug = int(kwargs["debug"])
        else:
            self.debug = 0

        # Storage location
        cp = None
        if "cache_path" in kwargs:
            cp = kwargs["cache_path"]
        self.cache_path = self.get_cache_path(cp)

        # =================================
        # Acquire knowledgebase information
        # =================================

        # Database field definition
        knowledge = DwdCdcKnowledge.climate.get_resolution_by_name(self.resolution)
        self.fields = DwdCdcKnowledge.as_dict(knowledge)

        # Sanity checks
        if not self.fields:
            log.error(
                'No schema information for resolution "%s" found in knowledge base.',
                self.resolution,
            )
            sys.exit(1)

        # =====================
        # Configure HTTP client
        # =====================
        self.cdc = DwdCdcClient(self.resolution, self.cache_path)

        # ========================
        # Configure cache database
        # ========================

        # Reset cache if requested
        if "reset_cache" in kwargs and kwargs["reset_cache"]:
            self.reset_cache()

        # Initialize
        self.init_cache()

    def resolve_categories(self, category_names):
        available_categories = deepcopy(DwdCdcKnowledge.climate.measurements)
        if category_names:
            categories = filter(
                lambda category: category["name"] in category_names,
                available_categories,
            )
        else:
            categories = available_categories
        return categories

    def dict_factory(self, cursor, row):
        """
        For emission of dicts from sqlite3
        """
        d = {}
        for idx, col in enumerate(cursor.description):
            d[col[0]] = row[idx]
        return d

    def get_cache_path(self, path):
        if path is None:
            home = os.path.expanduser("~") + os.sep + ".dwd-weather"
        else:
            home = path
        if not os.path.exists(home):
            os.mkdir(home)
        return home

    def get_cache_database(self):
        database_file = os.path.join(self.cache_path, APP_NAME + ".db")
        return database_file

    def reset_cache(self):
        database_file = self.get_cache_database()

        if os.path.exists(database_file):
            os.remove(database_file)

    def init_cache(self):
        """
        Creates ``.dwd-weather`` directory in the current
        user's home, where a cache database and config
        file will reside.
        """

        database_file = self.get_cache_database()
        log.info('Using cache database {}'.format(database_file))

        self.db = sqlite3.connect(database_file)

        # Enable debugging.
        #self.db.set_trace_callback(print)
        #self.db.set_trace_callback(None)

        self.db.row_factory = self.dict_factory
        c = self.db.cursor()

        tablename = self.get_measurement_table()

        # Create measurement tables and index.
        create_fields = []
        for category in sorted(self.fields.keys()):
            for fieldname, fieldtype in self.fields[category]:
                create_fields.append("%s %s" % (fieldname, fieldtype))
        create = "CREATE TABLE IF NOT EXISTS {table} (station_id int, datetime int, {sql_fields})".format(
            table=tablename, sql_fields=",\n".join(create_fields)
        )
        index = "CREATE UNIQUE INDEX IF NOT EXISTS {table}_uniqueidx ON {table} (station_id, datetime)".format(
            table=tablename
        )
        c.execute(create)
        c.execute(index)

        # Create station tables and index.
        tablename = self.get_stations_table()
        create = """
            CREATE TABLE IF NOT EXISTS {table}
            (
                station_id int,
                date_start int,
                date_end int,
                geo_lon real,
                geo_lat real,
                height int,
                name text,
                state text
            )""".format(
            table=tablename
        )
        index = "CREATE UNIQUE INDEX IF NOT EXISTS {table}_uniqueidx ON {table} (station_id, date_start)".format(
            table=tablename
        )
        c.execute(create)
        c.execute(index)

        self.db.commit()

    def import_stations(self):
        """
        Load station meta data from DWD server.
        """
        for result in self.cdc.get_stations(self.categories):
            self.import_station(result.payload)

    def import_station(self, content):
        """
        Takes the content of one station metadata file
        and imports it into the database.
        """
        content = content.decode("latin1")
        content = content.strip()
        content = content.replace("\r", "")
        content = content.replace("\n\n", "\n")

        table = self.get_stations_table()

        insert_sql = """INSERT OR IGNORE INTO {table}
            (station_id, date_start, date_end, geo_lon, geo_lat, height, name, state)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?)""".format(
            table=table
        )
        update_sql = """UPDATE {table}
            SET date_end=?, geo_lon=?, geo_lat=?, height=?, name=?, state=?
            WHERE station_id=? AND date_start=?""".format(
            table=table
        )
        cursor = self.db.cursor()
        # print content
        linecount = 0
        for line in content.split("\n"):
            linecount += 1
            line = line.strip()
            if line == "" or line == u"\x1a":
                continue
            # print linecount, line
            if linecount > 2:
                # frist 7 fields
                parts = re.split(r"\s+", line, 6)
                # seperate name from Bundesland
                (name, bundesland) = parts[6].rsplit(" ", 1)
                name = name.strip()
                del parts[6]
                parts.append(name)
                parts.append(bundesland)
                # print parts
                for n in range(len(parts)):
                    parts[n] = parts[n].strip()
                station_id = int(parts[0])
                station_height = int(parts[3])
                station_lat = float(parts[4])
                station_lon = float(parts[5])
                station_start = int(parts[1])
                station_end = int(parts[2])
                station_name = parts[6]
                station_state = parts[7]
                # issue sql
                cursor.execute(
                    insert_sql,
                    (
                        station_id,
                        station_start,
                        station_end,
                        station_lon,
                        station_lat,
                        station_height,
                        station_name,
                        station_state,
                    ),
                )
                cursor.execute(
                    update_sql,
                    (
                        station_end,
                        station_lon,
                        station_lat,
                        station_height,
                        station_name,
                        station_state,
                        station_id,
                        station_start,
                    ),
                )
        self.db.commit()

    def import_measures(self, station_id, current=False, latest=False, historic=False):
        """
        Load data from DWD server.
        Parameter:

        station_id: e.g. 2667 (Köln-Bonn airport)

        latest: Load most recent data (True, False)
        historic: Load older values

        We download ZIP files for several categories
        of measures. We then extract one file from
        each ZIP. This path is then handed to the
        CSV -> Sqlite import function.
        """

        # Compute timerange labels / subfolder names.
        timeranges = []
        if current:
            timeranges.append("now")
        if latest:
            timeranges.append("recent")
        if historic:
            timeranges.append("historical")

        # Reporting.
        station_info = self.station_info(station_id)
        log.info("Downloading measurements for station %d and timeranges %s" % (station_id, timeranges))
        log.info(
            "Station information: %s"
            % json.dumps(station_info, indent=2, sort_keys=True)
        )

        # Download and import data.
        for category in self.categories:
            key = category["key"]
            name = category["name"].replace("_", " ")
            log.info('Downloading "{}" data ({})'.format(name, key))
            for result in self.cdc.get_measurements(station_id, category, timeranges):
                # Import data for all categories.
                log.info(
                    'Importing measurements for station "{}" and category "{}"'.format(
                        station_id, category
                    )
                )
                # log.warning("No files to import for station %s" % station_id)
                self.import_measures_textfile(result)

    def datetime_to_int(self, datetime):
        return int(datetime.replace("T", "").replace(":", ""))

    def get_measurement(self, station_id, date):
        tablename = self.get_measurement_table()
        sql = "SELECT * FROM {tablename} WHERE station_id = {station_id} AND datetime = {datetime}".format(
            tablename=tablename, station_id=station_id, datetime=date
        )

        c = self.db.cursor()
        c.execute(sql)

        result = []
        for row in c.execute(sql):
            result.append(row)

        if len(result) > 0:
            return result[0]
        else:
            return None

    def insert_measurement(self, tablename, fields, value_placeholders, dataset):
        sql = "INSERT INTO {tablename} ({fields}) VALUES ({value_placeholders})".format(
            tablename=tablename,
            fields=", ".join(fields),
            value_placeholders=", ".join(value_placeholders),
        )

        c = self.db.cursor()
        c.execute(sql, dataset)
        # self.db.commit()

    def update_measurement(self, tablename, sets, dataset):
        sql = "UPDATE {tablename} SET {sets} WHERE station_id = ? AND datetime = ?".format(
            tablename=tablename, sets=", ".join(sets)
        )

        c = self.db.cursor()
        c.execute(sql, dataset)
        # self.db.commit()

    def import_measures_textfile(self, result):
        """
        Import content of source text file into database.
        """

        category_name = result.category["name"]
        category_label = category_name.replace("_", " ")
        if category_name not in self.fields:
            log.warning(
                'Importing "{}" data from "{}" not implemented yet'.format(
                    category_label, result.uri
                )
            )
            return

        log.info('Importing "{}" data from "{}"'.format(category_label, result.uri))

        # Create SQL template.
        tablename = self.get_measurement_table()
        fieldnames = []
        value_placeholders = []
        sets = []

        fields = list(self.fields[category_name])
        for fieldname, fieldtype in fields:
            sets.append(fieldname + "=?")

        fields.append(("station_id", "str"))
        fields.append(("datetime", "datetime"))

        for fieldname, fieldtype in fields:
            fieldnames.append(fieldname)
            value_placeholders.append("?")

        # Create data rows.
        count = 0
        items = result.payload.decode("latin-1").split("\n")
        for line in tqdm(items, ncols=79):
            count += 1
            line = line.strip()
            if line == "" or line == "\x1a":
                continue
            line = line.replace(";eor", "")
            #print('Line:', line)

            parts = line.split(";")
            for n in range(len(parts)):
                parts[n] = parts[n].strip()

            if count > 1:

                # The first two fields are station id and timestamp in raw format.
                station_id_raw = parts.pop(0)
                timestamp_raw = parts.pop(0)

                # Parse station id.
                station_id = int(station_id_raw)

                # Parse timestamp.
                # FIXME: We should not store timestamps as integers but better use real datetimes.
                try:
                    timestamp_sanitized = timestamp_raw.replace("T", "").replace(":", "")

                    # If timestamp lacks minutes (like 2018112922),
                    # let's add them to make the datetime parser happy.
                    if len(timestamp_sanitized) == 10:
                        timestamp_sanitized += '00'

                    # Run sanitized timestamp through datatime parser
                    # and reformat it into the appropriate format.
                    timestamp_datetime = parsedate(timestamp_sanitized, ignoretz=True)
                    timestamp = int(timestamp_datetime.strftime(self.get_timestamp_format()))

                except Exception as ex:
                    log.error('Parsing timestamp "{}" failed: {}'.format(timestamp_sanitized, ex))
                    continue

                dataset = []

                # For debugging purposes.
                # if category_name == "soil_temp":
                #    print(self.fields[category_name])
                #    print(parts)

                for index, cell in enumerate(parts):
                    (fieldname, fieldtype) = self.fields[category_name][index]
                    if cell == "-999":
                        cell = None
                    elif fieldtype == "real":
                        cell = float(cell)
                    elif fieldtype == "int":
                        try:
                            cell = int(float(cell))
                        except ValueError:
                            sys.stderr.write(
                                "Error in converting field '%s', value '%s' to int.\n"
                                % (fieldname, cell)
                            )

                            # FIXME: Try to be more graceful here.
                            (t, val, trace) = sys.exc_info()
                            traceback.print_tb(trace)
                            sys.exit(2)

                    elif fieldtype == "datetime":
                        cell = self.datetime_to_int(cell)

                    dataset.append(cell)

                # "station_id" and "datetime" should go into the last
                # two slots of the SQL template to be interpolated
                # as constraints into the WHERE clause.
                dataset.append(station_id)
                dataset.append(timestamp)

                #print('Parts:', parts)
                #print('Dataset:', dataset)

                if self.get_measurement(station_id, timestamp):
                    self.update_measurement(tablename, sets, dataset)
                else:
                    self.insert_measurement(
                        tablename, fieldnames, value_placeholders, dataset
                    )

                # Commit in batches.
                if count % 500 == 0:
                    self.db.commit()

        # Commit all data.
        self.db.commit()

    def get_data_age(self):
        """
        Return age of latest dataset as ``datetime.timedelta``.
        """
        sql = (
            "SELECT MAX(datetime) AS maxdatetime FROM %s" % self.get_measurement_table()
        )
        c = self.db.cursor()
        c.execute(sql)
        item = c.fetchone()
        if item["maxdatetime"] is not None:
            latest = datetime.strptime(str(item["maxdatetime"]), "%Y%m%d%H")
            return datetime.utcnow() - latest

    def get_stations_table(self):
        return "stations_%s" % self.resolution

    def get_measurement_table(self):
        return "measures_%s" % self.resolution

    def get_timestamp_format(self):
        knowledge = DwdCdcKnowledge.climate.get_resolution_by_name(self.resolution)
        return knowledge.__timestamp_format__

    def query(self, station_id, timestamp, recursion=0):
        """
        Get values from cache.
        station_id: Numeric station ID
        timestamp: datetime object
        """
        if recursion < 2:
            sql = (
                "SELECT * FROM %s WHERE station_id=? AND datetime LIKE ?"
                % self.get_measurement_table()
            )
            c = self.db.cursor()
            c.execute(
                sql, (station_id, timestamp.strftime(self.get_timestamp_format()))
            )
            out = c.fetchone()
            if out is None:
                # cache miss
                age = (datetime.utcnow() - timestamp).total_seconds() / 86400
                if age < 1:
                    self.import_measures(station_id, current=True, latest=False, historic=False)
                elif age < 360:
                    self.import_measures(station_id, latest=True, historic=False)
                elif age >= 360 and age <= 370:
                    self.import_measures(station_id, latest=True, historic=True)
                else:
                    self.import_measures(station_id, current=False, latest=False, historic=True)
                return self.query(station_id, timestamp, recursion=(recursion + 1))
            c.close()
            return out

    def haversine_distance(self, origin, destination):
        lon1, lat1 = origin
        lon2, lat2 = destination
        radius = 6371000  # meters

        dlat = math.radians(lat2 - lat1)
        dlon = math.radians(lon2 - lon1)
        a = math.sin(dlat / 2) * math.sin(dlat / 2) + math.cos(
            math.radians(lat1)
        ) * math.cos(math.radians(lat2)) * math.sin(dlon / 2) * math.sin(dlon / 2)
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
        d = radius * c
        return d

    def stations(self, historic=False):
        """
        Return list of dicts with all stations.
        """
        out = []
        table = self.get_stations_table()
        sql = """
            SELECT s2.*
            FROM {table} s1
            LEFT JOIN {table} s2 ON (s1.station_id=s2.station_id AND s1.date_end=s1.date_end)
            GROUP BY s1.station_id""".format(
            table=table
        )
        c = self.db.cursor()
        for row in c.execute(sql):
            out.append(row)
        c.close()
        if len(out) == 0:
            # cache miss - have to import stations.
            self.import_stations()
            out = self.stations()
        return out

    def station_info(self, station_id):
        table = self.get_stations_table()
        sql = "SELECT * FROM {table} WHERE station_id=?".format(table=table)
        c = self.db.cursor()
        c.execute(sql, (station_id,))
        return c.fetchone()

    def nearest_station(self, lon, lat, surrounding=False):
        """
        Select most current stations datasets.

        Parameters:
        ----------

            lon : float

            lat : float

            surrounding : float
                adds a buffer-zone in meter on top of the most closest
                distance, and returns a list with all stations inside this zone
                (instead of just one station)

        Example:
        --------

        >>> from dwdweather import DwdWeather
        >>> dwd = DwdWeather(resolution="hourly")
        >>> dwd.nearest_station(lon=7.0, lat=51.0, surrounding=10000)

        """

        closest = None
        closest_distance = 99999999999
        for station in self.stations():
            d = self.haversine_distance(
                (lon, lat), (station["geo_lon"], station["geo_lat"])
            )
            if d < closest_distance:
                closest = station
                closest_distance = d

        if surrounding:
            closest1 = []
            closest_distance = closest_distance+surrounding
            i = 0
            for station in self.stations():
                d = self.haversine_distance(
                    (lon, lat), (station["geo_lon"], station["geo_lat"])
                )
                if d < closest_distance:
                    closest1.append(station)
                    i += 1
            closest = closest1
        return closest

    def stations_geojson(self):
        out = {"type": "FeatureCollection", "features": []}
        for station in self.stations():
            out["features"].append(
                {
                    "type": "Feature",
                    "properties": {
                        "id": station["station_id"],
                        "name": station["name"],
                    },
                    "geometry": {
                        "type": "Point",
                        "coordinates": [station["geo_lon"], station["geo_lat"]],
                    },
                }
            )
        return json.dumps(out)

    def stations_csv(self, delimiter=","):
        """
        Return stations list as CSV.
        """
        csvfile = StringIO()
        # assemble field list
        headers = [
            "station_id",
            "date_start",
            "date_end",
            "geo_lon",
            "geo_lat",
            "height",
            "name",
        ]
        writer = csv.writer(csvfile, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL)
        writer.writerow(headers)
        stations = self.stations()
        for station in stations:
            row = []
            for n in range(len(headers)):
                val = station[headers[n]]
                if val is None:
                    val = ""
                elif type(val) == int:
                    val = str(val)
                elif type(val) == float:
                    val = "%.4f" % val
                row.append(val)
            writer.writerow(row)
        contents = csvfile.getvalue()
        csvfile.close()
        return contents