def parse_stationNumber(self, record, data, value): if data['stationNumber']: wmo_id = f"{data['blockNumber']}{data['stationNumber']:03d}" else: wmo_id = data['shortStationName'] record['wmo_station_id'] = wmo_id record['dwd_station_id'] = wmo_id_to_dwd(wmo_id)
def parse_station(self, station_sel, timestamps, source): wmo_station_id = station_sel.css('name::text').extract_first() dwd_station_id = wmo_id_to_dwd(wmo_station_id) station_name = station_sel.css('description::text').extract_first() lon, lat, height = station_sel.css( 'coordinates::text').extract_first().split(',') records = {'timestamp': timestamps} for element, column in self.ELEMENTS.items(): values_str = station_sel.css( f'Forecast[elementName="{element}"] value::text' ).extract_first() records[column] = [ None if row[0] == '-' else float(row[0]) for row in csv.reader( re.sub(r'\s+', '\n', values_str.strip()).splitlines()) ] assert len(records[column]) == len(timestamps) base_record = { 'observation_type': 'forecast', 'source': source, 'lat': float(lat), 'lon': float(lon), 'height': float(height), 'dwd_station_id': dwd_station_id, 'wmo_station_id': wmo_station_id, 'station_name': station_name, } # Turn dict of lists into list of dicts return ({ **base_record, **dict(zip(records, row)) } for row in zip(*records.values()))
def parse(self, lat=None, lon=None, height=None, station_name=None): with open(self.path) as f: reader = csv.DictReader(f, delimiter=';') wmo_station_id = next(reader)[self.DATE_COLUMN].rstrip('_') dwd_station_id = wmo_id_to_dwd(wmo_station_id) if any(x is None for x in (lat, lon, height, station_name)): lat, lon, height, station_name = self.load_location( wmo_station_id) # Skip row with German header titles next(reader) for row in reader: yield { 'observation_type': 'current', 'lat': lat, 'lon': lon, 'height': height, 'dwd_station_id': dwd_station_id, 'wmo_station_id': wmo_station_id, 'station_name': station_name, **self.parse_row(row) }
def parse_station(self, station_sel, timestamps, source): wmo_station_id = station_sel.css('name::text').extract_first() dwd_station_id = wmo_id_to_dwd(wmo_station_id) station_name = station_sel.css('description::text').extract_first() try: lon, lat, height = station_sel.css( 'coordinates::text').extract_first().split(',') except AttributeError: self.logger.warning( "Ignoring station without coordinates, WMO ID '%s', DWD ID " "'%s', name '%s'", wmo_station_id, dwd_station_id, station_name) return [] records = {'timestamp': timestamps} for element, column in self.ELEMENTS.items(): values_str = station_sel.css( f'Forecast[elementName="{element}"] value::text' ).extract_first() converter = getattr(self, f'parse_{column}', float) records[column] = [ None if row[0] == '-' else converter(row[0]) for row in csv.reader( re.sub(r'\s+', '\n', values_str.strip()).splitlines()) ] assert len(records[column]) == len(timestamps) base_record = { 'observation_type': 'forecast', 'source': source, 'lat': float(lat), 'lon': float(lon), 'height': float(height), 'dwd_station_id': dwd_station_id, 'wmo_station_id': wmo_station_id, 'station_name': station_name, } # Turn dict of lists into list of dicts return ( {**base_record, **dict(zip(records, row))} for row in zip(*records.values()) )