def retry_download_unit_biomass(): """Download Biomasseeinheit (unit-biomass) from list. Read list of failed downloads from csv. Remove duplicates and retry download. Write download to file. Returns ------- fname_biomass_unit : csv Write Biomasseeinheit to csv file. """ start_from = 0 if os.path.isfile(os.path.dirname(fname_biomass_fail_u)): unit_fail_csv = pd.read_csv(fname_biomass_fail_u, delimiter=';') unit_fail = unit_fail_csv['EinheitMastrNummer'] unit_fail_list = unit_fail.values.tolist() unit_fail_list = list(dict.fromkeys(unit_fail_list)) unit_fail_list_len = len(unit_fail_list) log.info(f'Retry download {unit_fail_list_len} failed Biomasseeinheit') for i in range(start_from, unit_fail_list_len, 1): unit_wind = get_power_unit_biomass(unit_fail_list[i]) # Third download if unit_wind is not None: write_to_csv(fname_biomass_unit, unit_wind) else: unit_fail_unit = {'EinheitMastrNummer': [unit_fail_list[i]]} log.exception(f'Third download failed unit_wind: {unit_fail_list[i]}', exc_info=False) unit_fail_third = pd.DataFrame(unit_fail_unit) unit_fail_third['timestamp'] = str(datetime.datetime.now()) unit_fail_third['comment'] = 'Third fail' write_to_csv(fname_biomass_fail_u, unit_fail_third) else: log.info('No failed downloads for Biomasseeinheit')
def download_unit_nuclear(): """Download Kernenergieeinheit. Write results to csv file. ofname : string Path to save the downloaded files. Existing units: 31543 (2019-02-10) """ start_from = 0 setup_power_unit_nuclear() power_unit_nuclear = read_power_unit_nuclear(fname_power_unit_nuclear) power_unit_nuclear = power_unit_nuclear['EinheitMastrNummer'] mastr_list = power_unit_nuclear.values.tolist() mastr_list = list(dict.fromkeys(mastr_list)) mastr_list_len = len(mastr_list) log.info(f'Download {mastr_list_len} Kernenergieeinheit') for i in range(start_from, mastr_list_len, 1): unit_nuclear = get_power_unit_nuclear(mastr_list[i]) # First download if unit_nuclear is not None: write_to_csv(fname_nuclear_unit, unit_nuclear) else: log.exception( f'First download failed unit_nuclear ({i}): {mastr_list[i]}', exc_info=False) unit_nuclear = get_power_unit_nuclear( mastr_list[i]) # Second download if unit_nuclear is not None: write_to_csv(fname_nuclear_unit, unit_nuclear) else: log.exception( f'Second download failed unit_nuclear ({i}): {mastr_list[i]}', exc_info=False)
def retry_download_unit_gsgk_eeg(): """Download GeoSolarthermieGrubenKlaerschlammeinheit-EEG (unit-gsgk-eeg) from list. Read list of failed downloads from csv. Remove duplicates and retry download. Write download to file. Returns ------- fname_gsgk_eeg : csv Write GeoSolarthermieGrubenKlaerschlammeinheit-EEG to csv file. """ start_from = 0 if os.path.isfile(os.path.dirname(fname_gsgk_fail_e)): unit_fail_csv = pd.read_csv(fname_gsgk_fail_e, delimiter=';') unit_fail = unit_fail_csv['EegMastrNummer'] unit_fail_list = unit_fail.values.tolist() unit_fail_list = list(dict.fromkeys(unit_fail_list)) unit_fail_list_len = len(unit_fail_list) log.info(f'Retry download {unit_fail_list_len} failed GeoSolarthermieGrubenKlaerschlammeinheit-EEG') for i in range(start_from, unit_fail_list_len, 1): unit_wind = get_unit_gsgk_eeg(unit_fail_list[i]) if unit_wind is not None: write_to_csv(fname_gsgk_eeg, unit_wind) else: unit_fail_eeg = {'EegMastrNummer': [unit_fail_list[i]]} log.exception(f'Third download failed unit_gsgk_eeg: {unit_fail_list[i]}', exc_info=False) unit_fail_third = pd.DataFrame(unit_fail_eeg) unit_fail_third['timestamp'] = str(datetime.datetime.now()) unit_fail_third['comment'] = 'Third fail' write_to_csv(fname_gsgk_fail_e, unit_fail_third) else: log.info('No failed downloads for GeoSolarthermieGrubenKlaerschlammeinheit-EEG')
def process_partionier(units, eeg=False): """ Manages the concrete download of units calling get_power_unit_wind Parameters ---------- units : list list of power units wind eeg : bool wether to retrieve eegs """ wind_list = units['EinheitMastrNummer'].values.tolist() if eeg==True: wind_list_eeg = units['EegMastrNummer'].values.tolist() else: wind_list_eeg = wind_list max_units = len(units) for i in range(1, max_units, 1): try: unit_wind = get_power_unit_wind(mastr_unit_wind=wind_list[i], mastr_unit_eeg=wind_list_eeg[i],eeg=eeg) write_to_csv(fname_wind, unit_wind[0]) if not len(unit_wind[1])==0: write_to_csv(fname_wind_eeg, unit_wind[1]) except: log.exception(f'Download failed unit_wind ({i}): {wind_list[i]}')
def setup_storage_units(overwrite=True): data_version = get_data_version() if overwrite: if os.path.isfile(fname_storage): remove_csv(fname_storage) if os.path.isfile(fname_power_unit): power_unit = read_power_units(fname_power_unit) if not power_unit.empty: power_unit = power_unit.drop_duplicates() power_unit_storage = power_unit[power_unit.Einheittyp == 'Stromspeichereinheit'] power_unit_storage.index.names = ['see_id'] power_unit_storage.reset_index() power_unit_storage.index.names = ['id'] if not power_unit_storage.empty: write_to_csv(fname_storage_unit, power_unit_storage) else: log.info('No storage units in this dataset. Storage units can be found starting at index: approx. 1 220 000') power_unit.iloc[0:0] return power_unit_storage else: log.info('no storageunits found') return pd.DataFrame() else: power_unit_solar = read_power_units(name_storage) return power_unit_solar
def download_wind_permit(units, start_from=0, overwrite=False): """Download unit_wind_permit using GetEinheitGenehmigung request. Parameters ---------- units : int the units to download start_from : int starting index overwrite : bool wether the current result file (if existing) should be replaced """ df_all = pd.DataFrame() unit_wind_list = units['GenMastrNummer'].values.tolist() unit_wind_list_len = len(unit_wind_list) for i in range(start_from, unit_wind_list_len, 1): if not pd.isna(unit_wind_list[i]): try: unit_wind_permit = get_unit_wind_permit(unit_wind_list[i]) for k,v in unit_wind_permit.VerknuepfteEinheiten.items(): df_new = pd.DataFrame.from_dict(v) df = pd.DataFrame() gennr = df_new.size * [unit_wind_permit.GenMastrNummer.iloc[0]] dates = df_new.size * [unit_wind_permit.Datum.iloc[0]] types = df_new.size * [unit_wind_permit.Art.iloc[0]] authority = df_new.size * [(unit_wind_permit.Behoerde.iloc[0]).translate({ord(','):None})] file_num = df_new.size * [unit_wind_permit.Aktenzeichen.iloc[0]] frist = df_new.size * [unit_wind_permit.Frist.iloc[0]['Wert']] water_num = df_new.size * [unit_wind_permit.WasserrechtsNummer.iloc[0]] water_date = df_new.size * [unit_wind_permit.WasserrechtAblaufdatum.iloc[0]['Wert']] reporting_date = df_new.size * [unit_wind_permit.Meldedatum.iloc[0]] df = pd.DataFrame( { 'GenMastrNummer':gennr, 'Datum': dates, 'Art': types, 'Behoerde': authority, 'Aktenzeichen': file_num, 'Frist': frist, 'WasserrechtsNummer': water_num, 'WasserrechtAblaufdatum': water_date, 'Meldedatum': reporting_date }) df_all = pd.concat([df_new, df.reindex(df_new.index)], axis=1) df_all = df_all.rename({'MaStRNummer':'EinheitMastrNummer'}, axis=1) #df_all.set_index(['MaStRNummer'], inplace=True) write_to_csv(fname_wind_permit,df_all) except: log.exception(f'Download failed unit_wind_permit ({i}): {unit_wind_list[i]}')
def get_unit_storage(mastr_unit_storage): """Get Solareinheit from API using GetEinheitStromspeicher.""" data_version = get_data_version() try: c = client_bind.GetEinheitStromSpeicher(apiKey=api_key, marktakteurMastrNummer=my_mastr, einheitMastrNummer=mastr_unit_storage) s = serialize_object(c) df = pd.DataFrame(list(s.items()), ) unit_storage = df.set_index(list(df.columns.values)[0]).transpose() unit_storage.reset_index() unit_storage.index.names = ['lid'] unit_storage['version'] = data_version unit_storage['timestamp'] = str(datetime.datetime.now()) write_to_csv(fname_storage, unit_storage) except Exception as e: return return unit_storage
def setup_power_unit_gsgk(): """Setup file for Stromerzeugungseinheit-GeoSolarthermieGrubenKlaerschlamm (power-unit_gsgk). Check if file with Stromerzeugungseinheit (power-unit) exists. Read Stromerzeugungseinheit and filter Stromerzeugungseinheit-GeoSolarthermieGrubenKlaerschlamm. Remove duplicates and write to file. Returns ------- fname_power_unit_gsgk : csv Write Stromerzeugungseinheit-GeoSolarthermieGrubenKlaerschlamm to csv file. """ if os.path.isfile(fname_power_unit_gsgk): log.info(f'Skip setup for Stromerzeugungseinheit-GeoSolarthermieGrubenKlaerschlamm') else: if os.path.isfile(fname_power_unit): power_unit = read_power_units(fname_power_unit) power_unit_gsgk = power_unit[power_unit.Einheittyp == 'GeoSolarthermieGrubenKlaerschlamm'] power_unit_gsgk = power_unit_gsgk.drop_duplicates(subset=['EinheitMastrNummer', 'Name', 'Einheitart', 'Einheittyp', 'Standort', 'Bruttoleistung', 'Erzeugungsleistung', 'EinheitBetriebsstatus', 'Anlagenbetreiber', 'EegMastrNummer', 'KwkMastrNummer', 'SpeMastrNummer', 'GenMastrNummer']) log.info(f'Filter power-unit for gsgk and remove duplicates') power_unit_gsgk.reset_index() power_unit_gsgk.index.name = 'pu-id' write_to_csv(fname_power_unit_gsgk, power_unit_gsgk) power_unit_gsgk_cnt = power_unit_gsgk['timestamp'].count() log.info(f'Write {power_unit_gsgk_cnt} power-unit_gsgk to {fname_power_unit_gsgk}') else: log.info(f'Error reading power-unit from {fname_power_unit}')
def setup_power_unit_biomass(): """Setup file for Stromerzeugungseinheit-Biomasse (power-unit_biomass). Check if file with Stromerzeugungseinheit (power-unit) exists. Read Stromerzeugungseinheit and filter Stromerzeugungseinheit-Biomasse. Remove duplicates and write to file. Returns ------- fname_power_unit_biomass : csv Write Stromerzeugungseinheit-Biomasse to csv file. """ if os.path.isfile(fname_power_unit_biomass): log.info(f'Skip setup for Stromerzeugungseinheit-Biomasse') else: if os.path.isfile(fname_power_unit): power_unit = read_power_units(fname_power_unit) power_unit_biomass = power_unit[power_unit.Einheittyp == 'Biomasse'] power_unit_biomass = power_unit_biomass.drop_duplicates(subset=['EinheitMastrNummer', 'Name', 'Einheitart', 'Einheittyp', 'Standort', 'Bruttoleistung', 'Erzeugungsleistung', 'EinheitBetriebsstatus', 'Anlagenbetreiber', 'EegMastrNummer', 'KwkMastrNummer', 'SpeMastrNummer', 'GenMastrNummer']) log.info(f'Filter power-unit for biomass and remove duplicates') power_unit_biomass.reset_index() power_unit_biomass.index.name = 'pu-id' write_to_csv(fname_power_unit_biomass, power_unit_biomass) power_unit_biomass_cnt = power_unit_biomass['timestamp'].count() log.info(f'Write {power_unit_biomass_cnt} power-unit_biomass to {fname_power_unit_biomass}') else: log.info(f'Error reading power-unit from {fname_power_unit}')
def download_unit_solar(): """Download Solareinheit. Existing units: 31543 (2019-02-10) """ start_from = 0 log.info('download unit solar..') unit_solar = setup_power_unit_solar(eeg=False) unit_solar_list = unit_solar['EinheitMastrNummer'].values.tolist() unit_solar_list_len = len(unit_solar_list) log.info(f'Download MaStR Solar') log.info(f'Number of unit_solar: {unit_solar_list_len}') for i in range(start_from, unit_solar_list_len, 1): try: unit_solar = get_power_unit_solar(unit_solar_list[i]) write_to_csv(fname_solar_unit, unit_solar) except: log.exception( f'Download failed unit_solar ({i}): {unit_solar_list[i]}')
def parallel_download(unit_list, func, filename, threads=4, timeout=10, time_blacklist=True): """Download a list of units using a pool of threads Maps a download function for a single unit onto a list of candidate units that are downloaded in parallel. Arguments --------- unit_list : Iterable of 'EinheitMastrNummer' of units to download func : callable function Function to download an individual unit from the list, i.e. get_power_unit_xxx() filename : str CSV file to write retrieved units to threads : int number of threads to download with timeout : int retry for this amount of minutes after the last successful query before stopping time_blacklist : bool exit as soon as current time is blacklisted """ _reset_timeout() with multiprocessing.Pool(threads) as pool: for unit in tqdm.tqdm(pool.imap_unordered(func, unit_list), total=len(unit_list)): # Check if data was retrieved successfully if unit is not None: _reset_timeout() write_to_csv(filename, unit) if _stop_execution(time_blacklist, timeout) is True: break
def download_unit_solar_eeg(): """Download unit_solar_eeg using GetAnlageEegSolar request. Parameters ---------- sublist : list list to process in parallel parallelism : int number of threads """ data_version = get_data_version() unit_solar = setup_power_unit_solar() unit_solar_list = unit_solar['EegMastrNummer'].values.tolist() unit_solar_list_len = len(unit_solar_list) for i in range(0, unit_solar_list_len, 1): try: unit_solar_eeg = get_unit_solar_eeg(unit_solar_list[i]) write_to_csv(fname_solar_eeg, unit_solar_eeg) except: log.exception( f'Download failed unit_solar_eeg ({i}): {unit_solar_list[i]}')
def download_unit_storage_eeg(): """Download Stromspeichereinheit-EEG (unit-storage-eeg) using GetAnlageEegStromspeicher request. Filter EegMastrNummer from Stromerzeugungseinheit-Stromspeicher. Remove duplicates and count. Loop over list and write download to file. Returns ------- fname_storage_eeg : csv Write Stromspeichereinheit-EEG to csv file. """ start_from = 0 setup_power_unit_storage() power_unit_storage = read_power_unit_storage(fname_power_unit_storage) power_unit_storage = power_unit_storage['EegMastrNummer'] mastr_list = power_unit_storage.values.tolist() mastr_list = list(dict.fromkeys(mastr_list)) mastr_list = [x for x in mastr_list if str(x) != 'nan'] mastr_list_len = len(mastr_list) log.info(f'Download {mastr_list_len} Stromspeichereinheit-EEG') for i in range(start_from, mastr_list_len, 1): unit_storage_eeg = get_unit_storage_eeg( mastr_list[i]) # First download if unit_storage_eeg is not None: write_to_csv(fname_storage_eeg, unit_storage_eeg) else: log.exception( f'First download failed unit_storage_eeg ({i}): {mastr_list[i]}', exc_info=False) unit_wind_eeg = get_unit_storage_eeg( mastr_list[i]) # Second download if unit_wind_eeg is not None: write_to_csv(fname_storage_eeg, unit_wind_eeg) else: eeg_fail = {'EegMastrNummer': [mastr_list[i]]} log.exception( f'Second download failed unit_storage_eeg ({i}): {mastr_list[i]}', exc_info=False) unit_fail = pd.DataFrame(eeg_fail) unit_fail['timestamp'] = str(datetime.datetime.now()) unit_fail['comment'] = 'Second fail' write_to_csv(fname_storage_fail_e, unit_fail) retry_download_unit_storage_eeg()
def download_unit_hydro(): """Download Wassereinheit (unit-hydro). Filter EinheitMastrNummer from Stromerzeugungseinheit-Wasser. Remove duplicates and count. Loop over list and write download to file. Existing units: 31543 (2019-02-10) Returns ------- fname_hydro_unit : csv Write Wassereinheit to csv file. """ start_from = 0 setup_power_unit_hydro() power_unit_hydro = read_power_unit_hydro(fname_power_unit_hydro) power_unit_hydro = power_unit_hydro['EinheitMastrNummer'] mastr_list = power_unit_hydro.values.tolist() mastr_list = list(dict.fromkeys(mastr_list)) mastr_list_len = len(mastr_list) log.info(f'Download {mastr_list_len} Wassereinheit') for i in range(start_from, mastr_list_len, 1): unit_hydro = get_power_unit_hydro(mastr_list[i]) # First download if unit_hydro is not None: write_to_csv(fname_hydro_unit, unit_hydro) else: log.exception( f'First download failed unit_hydro ({i}): {mastr_list[i]}', exc_info=False) unit_hydro = get_power_unit_hydro(mastr_list[i]) # Second download if unit_hydro is not None: write_to_csv(fname_hydro_unit, unit_hydro) else: mastr_fail = {'EinheitMastrNummer': [mastr_list[i]]} log.exception( f'Second download failed unit_hydro ({i}): {mastr_list[i]}', exc_info=False) unit_fail = pd.DataFrame(mastr_fail) unit_fail['timestamp'] = str(datetime.datetime.now()) unit_fail['comment'] = 'Second fail' write_to_csv(fname_hydro_fail_u, unit_fail) retry_download_unit_hydro()
def download_unit_storage(): """Download Stromspeichereinheit. Write results to csv file. ofname : string Path to save the downloaded files. Existing units: 31543 (2019-02-10) """ start_from = 0 setup_power_unit_storage() power_unit_storage = read_power_unit_storage(fname_power_unit_storage) power_unit_storage = power_unit_storage['EinheitMastrNummer'] mastr_list = power_unit_storage.values.tolist() mastr_list = list(dict.fromkeys(mastr_list)) mastr_list_len = len(mastr_list) log.info(f'Download {mastr_list_len} Stromspeichereinheit') for i in range(start_from, mastr_list_len, 1): unit_storage = get_power_unit_storage(mastr_list[i]) # First download if unit_storage is not None: write_to_csv(fname_storage_unit, unit_storage) else: log.exception( f'First download failed unit_storage ({i}): {mastr_list[i]}', exc_info=False) unit_storage = get_power_unit_storage( mastr_list[i]) # Second download if unit_storage is not None: write_to_csv(fname_storage_unit, unit_storage) else: mastr_fail = {'EinheitMastrNummer': [mastr_list[i]]} log.exception( f'Second download failed unit_storage ({i}): {mastr_list[i]}', exc_info=False) unit_fail = pd.DataFrame(mastr_fail) unit_fail['timestamp'] = str(datetime.datetime.now()) unit_fail['comment'] = 'Second fail' write_to_csv(fname_storage_fail_u, unit_fail) retry_download_unit_storage()
def download_power_unit(power_unit_list_len=TOTAL_POWER_UNITS, pu_limit=API_MAX_DEMANDS, energy_carrier='None'): """Download StromErzeuger. Arguments --------- power_unit_list_len : None|int Maximum number of units to get. Check MaStR portal for current number. pu_limit : int Number of units to get per call to API (limited to 2000). energy_carrier: string Energieträger: None, AndereGase, Biomasse, Braunkohle, Erdgas, Geothermie, Grubengas, Kernenergie, Klaerschlamm, Mineraloelprodukte, NichtBiogenerAbfall, SolareStrahlungsenergie, Solarthermie, Speicher, Steinkohle, Waerme, Wind, Wasser Existing units: 1822000 (2019-02-10) 1844882 (2019-02-15) 1847117 (2019-02-17) 1864103 (2019-02-23) 1887270 (2019-03-03) 1965200 (2019-04-11) 2328576 (2019-09-30) 2331651 (2019-10-01) 2359365 (2019-10-15) 2363200 (2019-10-17) 2468804 (2019-11-28) 2487585 (2019-12-05) data-release/2.2.0 2791367 (2020-03-21) data-release/2.2.1 2812372 (2020-03-28) data-release/2.4.0 3197769 (2020-08-17) data-release/2.5.0 3200862 (2020-08-18) data-release/2.5.1 3203715 (2020-08-19) data-release/2.5.2 3204000 (2020-08-20) data-release/2.5.5 3233056 (2020-08-20) data-release/2.7.0 """ log.info(f'Download MaStR power unit for energy carrier: {energy_carrier}') log.info(f'Number of expected power units: {power_unit_list_len}') if energy_carrier == 'Kernenergie': filename = fname_power_unit_nuclear elif energy_carrier == 'Wind': filename = fname_power_unit_wind elif energy_carrier == 'Wasser': filename = fname_power_unit_hydro elif energy_carrier == 'Biomasse': filename = fname_power_unit_biomass elif energy_carrier == 'SolareStrahlungsenergie': filename = fname_power_unit_solar elif energy_carrier == 'Speicher': filename = fname_power_unit_storage elif energy_carrier == 'Geothermie' or energy_carrier == 'Solarthermie' or energy_carrier == 'Grubengas' or energy_carrier == 'Klaerschlamm': filename = fname_power_unit_gsgk elif energy_carrier == 'AndereGase' or 'Braunkohle' or 'Erdgas' or 'NichtBiogenerAbfall' or 'Steinkohle' or 'Waerme': filename = fname_power_unit_combustion else: filename = fname_power_unit log.info(f'Write to: {filename}') # if the list size is smaller than the limit if pu_limit > power_unit_list_len: pu_limit = power_unit_list_len for start_from in range(0, power_unit_list_len, pu_limit): try: start_from, power_unit = get_power_unit(start_from, energy_carrier, pu_limit) write_to_csv(filename, pd.DataFrame(power_unit)) power_unit_len = len(power_unit) log.info( f'Download power_unit from {start_from}-{start_from + pu_limit}' ) except: log.exception(f'Download failed power_unit from {start_from}')
def download_unit_wind_eeg(): """Download Windeinheit-EEG (unit-wind-eeg) using GetAnlageEegWind request. 1. Filter EegMastrNummer from Stromerzeugungseinheit-Wind. 2. Filter EegMastrNummer from Windeinheit. Remove duplicates and count. Loop over list and write download to file. Returns ------- fname_wind_eeg : csv Write Windeinheit-EEG to csv file. """ start_from = 0 setup_power_unit_wind() power_unit_wind_1 = read_power_unit_wind(fname_power_unit_wind) power_unit_wind_1 = power_unit_wind_1.dropna(subset=['EegMastrNummer']) power_unit_wind_1 = power_unit_wind_1['EegMastrNummer'] mastr_list_1 = power_unit_wind_1.values.tolist() mastr_list_1 = list(dict.fromkeys(mastr_list_1)) mastr_list_len_1 = len(mastr_list_1) log.info( f'Read {mastr_list_len_1} unique EegMastrNummer from Stromerzeugungseinheit-Wind' ) power_unit_wind_2 = read_unit_wind(fname_wind_unit) power_unit_wind_2 = power_unit_wind_2.dropna(subset=['EegMastrNummer']) power_unit_wind_2 = power_unit_wind_2['EegMastrNummer'] mastr_list_2 = power_unit_wind_2.values.tolist() mastr_list_2 = list(dict.fromkeys(mastr_list_2)) mastr_list_len_2 = len(mastr_list_2) log.info(f'Read {mastr_list_len_2} unique EegMastrNummer from Windeinheit') mastr_list = mastr_list_1 + mastr_list_2 mastr_list = list(dict.fromkeys(mastr_list)) mastr_list = [x for x in mastr_list if str(x) != 'nan'] mastr_list_len = len(mastr_list) log.info(f'Download {mastr_list_len} Windeinheit-EEG') for i in range(start_from, mastr_list_len, 1): unit_wind_eeg = get_unit_wind_eeg(mastr_list[i]) # First download if unit_wind_eeg is not None: write_to_csv(fname_wind_eeg, unit_wind_eeg) else: log.exception( f'First download failed unit_wind_eeg ({i}): {mastr_list[i]}', exc_info=False) unit_wind_eeg = get_unit_wind_eeg(mastr_list[i]) # Second download if unit_wind_eeg is not None: write_to_csv(fname_wind_eeg, unit_wind_eeg) else: eeg_fail = {'EegMastrNummer': [mastr_list[i]]} log.exception( f'Second download failed unit_wind_eeg ({i}): {eeg_fail}', exc_info=False) unit_fail = pd.DataFrame(eeg_fail) unit_fail['timestamp'] = str(datetime.datetime.now()) unit_fail['comment'] = 'Second fail' write_to_csv(fname_wind_fail_e, unit_fail) retry_download_unit_wind_eeg()
def download_unit_wind_permit(): """Download Windeinheit-Genehmigung using GetEinheitGenehmigung request. Filter GenMastrNummer from Stromerzeugungseinheit-Wind. Filter GenMastrNummer from Windeinheit. Remove duplicates and count. Loop over list and write download to file. ToDo: More Documentation needed @solar-c Returns ------- fname_wind_permit : csv Write Windeinheit-Genehmigung to csv file. """ start_from = 0 setup_power_unit_wind() power_unit_wind_1 = read_power_unit_wind(fname_power_unit_wind) power_unit_wind_1 = power_unit_wind_1.dropna(subset=['GenMastrNummer']) power_unit_wind_1 = power_unit_wind_1['GenMastrNummer'] mastr_list_1 = power_unit_wind_1.values.tolist() mastr_list_1 = list(dict.fromkeys(mastr_list_1)) mastr_list_len_1 = len(mastr_list_1) log.info( f'Read {mastr_list_len_1} unique GenMastrNummer from Stromerzeugungseinheit-Wind' ) power_unit_wind_2 = read_unit_wind(fname_wind_unit) power_unit_wind_2 = power_unit_wind_2.dropna(subset=['GenMastrNummer']) power_unit_wind_2 = power_unit_wind_2['GenMastrNummer'] mastr_list_2 = power_unit_wind_2.values.tolist() mastr_list_2 = list(dict.fromkeys(mastr_list_2)) mastr_list_len_2 = len(mastr_list_2) log.info(f'Read {mastr_list_len_2} unique GenMastrNummer from Windeinheit') mastr_list = mastr_list_1 + mastr_list_2 mastr_list = list(dict.fromkeys(mastr_list)) mastr_list = [x for x in mastr_list if str(x) != 'nan'] mastr_list_len = len(mastr_list) log.info(f'Download {mastr_list_len} Windeinheit-Genehmigung') df_all = pd.DataFrame() for i in range(start_from, mastr_list_len, 1): if not pd.isna(mastr_list[i]): try: unit_wind_permit = get_unit_wind_permit(mastr_list[i]) for k, v in unit_wind_permit.VerknuepfteEinheiten.items(): df_new = pd.DataFrame.from_dict(v) df = pd.DataFrame() gennr = df_new.size * [ unit_wind_permit.GenMastrNummer.iloc[0] ] dates = df_new.size * [unit_wind_permit.Datum.iloc[0]] types = df_new.size * [unit_wind_permit.Art.iloc[0]] authority = df_new.size * [ (unit_wind_permit.Behoerde.iloc[0]).translate( {ord(','): None}) ] file_num = df_new.size * [ unit_wind_permit.Aktenzeichen.iloc[0] ] frist = df_new.size * [ unit_wind_permit.Frist.iloc[0]['Wert'] ] water_num = df_new.size * [ unit_wind_permit.WasserrechtsNummer.iloc[0] ] water_date = df_new.size * [ unit_wind_permit.WasserrechtAblaufdatum.iloc[0]['Wert'] ] reporting_date = df_new.size * [ unit_wind_permit.Meldedatum.iloc[0] ] df = pd.DataFrame({ 'GenMastrNummer': gennr, 'Datum': dates, 'Art': types, 'Behoerde': authority, 'Aktenzeichen': file_num, 'Frist': frist, 'WasserrechtsNummer': water_num, 'WasserrechtAblaufdatum': water_date, 'Meldedatum': reporting_date }) df_all = pd.concat( [df_new, df.reindex(df_new.index)], axis=1) # df_all.set_index(['MaStRNummer'], inplace=True) # df_all['version'] = data_version # df_all['timestamp'] = str(datetime.datetime.now()) write_to_csv(fname_wind_permit, df_all) except: gen_fail = {'GenMastrNummer': [mastr_list[i]]} log.exception( f'First download failed unit_wind_permit ({i}): {mastr_list[i]}', exc_info=False) unit_fail = pd.DataFrame(gen_fail) unit_fail['timestamp'] = str(datetime.datetime.now()) unit_fail['comment'] = 'First fail' write_to_csv(fname_wind_fail_p, unit_fail)