def generate_counter_samples( counters: Counters, measurement: str, key_labels: typing.Sequence[str], field_labels: typing.Sequence[str], ): assert len(key_labels) == len(counters.keys) assert len(field_labels) == counters.data.shape[-1] data = counters.data keysets = list(itertools.product(*counters.keys)) reshaped = counters.data.reshape( (data.shape[0], len(keysets), data.shape[-1]), ) for i in range(counters.data.shape[0]): date = counters.first_date + timedelta(days=i) timestamp = datetime(date.year, date.month, date.day) for j, keyset in enumerate(keysets): tags = tuple((k, v) for ks, vs in zip(key_labels, keyset) for k, v in zip(ks, vs)) row = tuple(reshaped[i, j]) if not any(row): # skip this sample: we have a lot of those because we have a # non-3.NF database: state and district are in the same column # and we get the product of all states and districts -> many # rows which never have non-zero values. continue yield influxdb.InfluxDBSample( measurement=measurement, tags=tags, fields=tuple( (k, v) for k, v in zip(field_labels, reshaped[i, j])), timestamp=timestamp, ns_part=0, )
def generate_population_samples(population_info, measurement: str, first_date: datetime, ndays: int): templates = [] for country, population in population_info.items(): templates.append( influxdb.InfluxDBSample( measurement=measurement, tags=(("country", country), ), fields=(("population", population), ), timestamp=None, ns_part=0, )) for i in range(ndays + 1): date = first_date + timedelta(days=i) timestamp = datetime(date.year, date.month, date.day) yield from (template._replace(timestamp=timestamp) for template in templates)
def read(f, stationmap): reader = csv.reader(f, delimiter=";") header = next(reader) header_index = {k.strip(): i for i, k in enumerate(header)} date_index = header_index["MESS_DATUM"] station_id_index = header_index["STATIONS_ID"] fieldmap = { header_index[key]: field for key, field in FIELDMAP.items() if key in header_index } for row in reader: # parser check assert row[-1] == "eor" timestamp = datetime.strptime(row[date_index].strip(), "%Y%m%d") if timestamp < DATE_CUTOFF: continue station_id = int(row[station_id_index].strip()) tags = ( ("station_id", str(station_id)), ("state", stationmap[station_id]), ) fields = [] for index, field in fieldmap.items(): value_s = row[index] value = float(value_s.strip()) if value == -999: continue fields.append((field, value), ) if not fields: continue yield influxdb.InfluxDBSample( measurement="dwd_weather", timestamp=timestamp, ns_part=0, fields=tuple(fields), tags=tags, )
def import_samples(f): for row in csv.DictReader(f): year = int(row.get("Jahr", 2020)) week = int(row["Kalenderwoche"]) ntests = int(row["AnzahlTestungen"]) npositive = int(row["TestsPositiv"]) nsites = int(row["AnzahlLabore"]) d = monday_of_calenderweek(year, week) yield influxdb.InfluxDBSample( measurement=MEASUREMENT, timestamp=datetime(d.year, d.month, d.day), ns_part=0, tags=(), fields=( ("tests", ntests), ("positives", npositive), ("sites", nsites), ), ) print(f"\x1b[J{d}", end="\r")
def generate_events(events): for ev in events: tags = [] fields = [] timestamp = datetime.strptime(ev["date"], "%Y-%m-%d") fields.append(("title", ev["title"])) try: text_parts = [ev["text"]] except KeyError: text_parts = [] state = ev.get("state") district = ev.get("district") loc_parts = [] if district is not None: loc_parts.append(district) if state is not None: loc_parts.append(state) if loc_parts: text_parts.append(f"<sup>({', '.join(loc_parts)})</sup>") is_spreader = bool(ev.get("is_spreader", False)) is_policy = bool(ev.get("is_policy", False)) tags.append(("is_spreader", INFLUX_BOOL_NAMES[is_spreader])) tags.append(("is_policy", INFLUX_BOOL_NAMES[is_policy])) if state is not None: tags.append(("state", state)) if district is not None: tags.append(("district", district)) if is_spreader: fields.append(("spreader_class", ev["spreader_class"])) fields.append(("text", "\n".join(text_parts))) yield influxdb.InfluxDBSample( timestamp=timestamp, ns_part=0, tags=tuple(tags), fields=tuple(fields), measurement=MEASUREMENT, )
def generate_samples(f): reader = csv.DictReader(f) for row in reader: state = translate_state(row["Bundesland"]) nreports = int(row["Anzahl_Meldebereiche_Erwachsene"]) ninuse = int(row["Belegte_Intensivbetten_Erwachsene"]) nfree = int(row["Freie_Intensivbetten_Erwachsene"]) ninuse_covid = int(row["Aktuelle_COVID_Faelle_Erwachsene_ITS"]) nemergency_reserve = int(row["7_Tage_Notfallreserve_Erwachsene"]) yield influxdb.InfluxDBSample( timestamp=parse_date(row["Datum"]), ns_part=0, tags=(("state", state), ), fields=( ("reporting", nreports), ("inuse", ninuse), ("inuse_covid", ninuse_covid), ("emergency_reserve", nemergency_reserve), ("free", nfree), ), measurement=MEASUREMENT, )