def emdat_to_impact(emdat_file_csv, year_range=None, countries=None,\
                    hazard_type_emdat=None, hazard_type_climada=None, \
                    reference_year=0, imp_str="Total damage ('000 US$)"):
    """function to load EM-DAT data return impact per event

    Parameters:
        emdat_file_csv (str): Full path to EMDAT-file (CSV), i.e.:
            emdat_file_csv = os.path.join(SYSTEM_DIR, 'emdat_201810.csv')

        hazard_type_emdat (str): Hazard (sub-)type according to EMDAT terminology,
            i.e. 'Tropical cyclone' for tropical cyclone
        OR
        hazard_type_climada (str): Hazard type CLIMADA abbreviation,
            i.e. 'TC' for tropical cyclone
    Optional parameters:
        year_range (list with 2 integers): start and end year i.e. [1980, 2017]
            default: None --> take year range from EM-DAT file
        countries (list of str): country ISO3-codes or names, i.e. ['JAM'].
            Set to None or ['all'] for all countries (default)

        reference_year (int): reference year of exposures. Impact is scaled
            proportional to GDP to the value of the reference year. No scaling
            for reference_year=0 (default)
        imp_str (str): Column name of impact metric in EMDAT CSV,
            default = "Total damage ('000 US$)"

    Returns:
        impact_instance (instance of climada.engine.Impact):
            impact object of same format as output from CLIMADA
            impact computation
            scaled with GDP to reference_year if reference_year noit equal 0
            i.e. 1000 current US$ for imp_str="Total damage ('000 US$) scaled".
            impact_instance.eai_exp holds expected annual impact for each country.
            impact_instance.coord_exp holds rough central coordinates for each country.
        countries (list): ISO3-codes of countries imn same order as in impact_instance.eai_exp
    """
    # Mapping of hazard type between EM-DAT and CLIMADA:
    if not hazard_type_climada:
        if not hazard_type_emdat:
            LOGGER.error(
                'Either hazard_type_climada or hazard_type_emdat need to be defined.'
            )
            return None
        if hazard_type_emdat == 'Tropical cyclone':
            hazard_type_climada = 'TC'
        elif hazard_type_emdat == 'Drought':
            hazard_type_climada = 'DR'
        elif hazard_type_emdat == 'Landslide':
            hazard_type_climada = 'LS'
        elif hazard_type_emdat == 'Riverine flood':
            hazard_type_climada = 'RF'
        elif hazard_type_emdat in [
                'Wildfire', 'Forest Fire', 'Land fire (Brush, Bush, Pasture)'
        ]:
            hazard_type_climada = 'BF'
        elif hazard_type_emdat == 'Extra-tropical storm':
            hazard_type_climada = 'WS'
    elif not hazard_type_emdat:
        if hazard_type_climada == 'TC':
            hazard_type_emdat = 'Tropical cyclone'
        elif hazard_type_climada == 'DR':
            hazard_type_emdat = 'Drought'
        elif hazard_type_climada == 'LS':
            hazard_type_emdat = 'Landslide'
        elif hazard_type_climada == 'RF':
            hazard_type_emdat = 'Riverine flood'
        elif hazard_type_climada == 'BF':
            hazard_type_emdat = 'Wildfire'
        elif hazard_type_climada == 'WS':
            hazard_type_emdat = 'Extra-tropical storm'

    # Inititate Impact-instance:
    impact_instance = Impact()

    impact_instance.tag = dict()
    impact_instance.tag['haz'] = TagHaz(haz_type=hazard_type_climada, \
                       file_name=emdat_file_csv, description='EM-DAT impact, direct import')
    impact_instance.tag['exp'] = Tag(file_name=emdat_file_csv, \
                       description='EM-DAT impact, direct import')
    impact_instance.tag['if_set'] = Tag(file_name=None, description=None)

    if not countries or countries == ['all']:
        countries = emdat_countries_by_hazard(hazard_type_emdat, emdat_file_csv, \
                                    ignore_missing=True, verbose=True)[0]
    else:
        if isinstance(countries, str):
            countries = [countries]
    # Load EM-DAT impact data by event:
    em_data = emdat_impact_event(countries, hazard_type_emdat, emdat_file_csv, \
                                 year_range, reference_year=reference_year)
    if em_data.empty:
        return impact_instance, countries
    impact_instance.event_id = np.array(em_data.index, int)
    impact_instance.event_name = list(em_data['Disaster No.'])

    date_list = list()
    for year in list(em_data['year']):
        date_list.append(datetime.toordinal(datetime.strptime(str(year),
                                                              '%Y')))
    boolean_warning = True
    for idx, datestr in enumerate(list(em_data['Start date'])):
        try:
            date_list[idx] = datetime.toordinal(
                datetime.strptime(datestr[-7:], '%m/%Y'))
        except ValueError:
            if boolean_warning:
                LOGGER.warning('EM_DAT CSV contains invalid time formats')
                boolean_warning = False
        try:
            date_list[idx] = datetime.toordinal(
                datetime.strptime(datestr, '%d/%m/%Y'))
        except ValueError:
            if boolean_warning:
                LOGGER.warning('EM_DAT CSV contains invalid time formats')
                boolean_warning = False

    impact_instance.date = np.array(date_list, int)

    impact_instance.crs = DEF_CRS

    if reference_year == 0:
        impact_instance.at_event = np.array(em_data[imp_str])
    else:
        impact_instance.at_event = np.array(em_data[imp_str + " scaled"])
    if not year_range:
        year_range = [em_data['year'].min(), em_data['year'].max()]
    impact_instance.frequency = np.ones(
        em_data.shape[0]) / (1 + np.diff(year_range))
    impact_instance.tot_value = 0
    impact_instance.aai_agg = sum(impact_instance.at_event *
                                  impact_instance.frequency)
    impact_instance.unit = 'USD'
    impact_instance.imp_mat = []

    # init rough exposure with central point per country
    shp = shapereader.natural_earth(resolution='110m',
                                    category='cultural',
                                    name='admin_0_countries')
    shp = shapefile.Reader(shp)
    countries_reg_id = list()
    countries_lat = list()
    countries_lon = list()
    impact_instance.eai_exp = np.zeros(
        len(countries))  # empty: damage at exposure
    for idx, cntry in enumerate(countries):
        try:
            cntry = iso_cntry.get(cntry).alpha3
        except KeyError:
            LOGGER.error('Country not found in iso_country: ' + cntry)
        cntry_boolean = False
        for rec_i, rec in enumerate(shp.records()):
            if rec[9].casefold() == cntry.casefold():
                bbox = shp.shapes()[rec_i].bbox
                cntry_boolean = True
                break
        if cntry_boolean:
            countries_lat.append(np.mean([bbox[1], bbox[3]]))
            countries_lon.append(np.mean([bbox[0], bbox[2]]))
        else:
            countries_lat.append(np.nan)
            countries_lon.append(np.nan)
        try:
            countries_reg_id.append(int(iso_cntry.get(cntry).numeric))
        except KeyError:
            countries_reg_id.append(0)
        df_tmp = em_data[em_data['ISO'].str.contains(cntry)]
        if reference_year == 0:
            impact_instance.eai_exp[idx] = sum(np.array(df_tmp[imp_str])*\
                                   impact_instance.frequency[0])
        else:
            impact_instance.eai_exp[idx] = sum(np.array(df_tmp[imp_str + " scaled"])*\
                                   impact_instance.frequency[0])

    impact_instance.coord_exp = np.stack([countries_lat, countries_lon],
                                         axis=1)
    #impact_instance.plot_raster_eai_exposure()

    return impact_instance, countries
def emdat_to_impact(emdat_file_csv,
                    hazard_type_climada,
                    year_range=None,
                    countries=None,
                    hazard_type_emdat=None,
                    reference_year=None,
                    imp_str="Total Damages"):
    """function to load EM-DAT data return impact per event

    Parameters:
        emdat_file_csv (str): Full path to EMDAT-file (CSV), i.e.:
            emdat_file_csv = SYSTEM_DIR.joinpath('emdat_201810.csv')
        hazard_type_climada (str): Hazard type CLIMADA abbreviation,
            i.e. 'TC' for tropical cyclone

    Optional parameters:
        hazard_type_emdat (list or str): List of Disaster (sub-)type accordung
            EMDAT terminology, e.g.:
            Animal accident, Drought, Earthquake, Epidemic, Extreme temperature,
            Flood, Fog, Impact, Insect infestation, Landslide, Mass movement (dry),
            Storm, Volcanic activity, Wildfire;
            Coastal Flooding, Convective Storm, Riverine Flood, Tropical cyclone,
            Tsunami, etc.;
            OR CLIMADA hazard type abbreviations, e.g. TC, BF, etc.
            If not given, it is deducted from hazard_type_climada
        year_range (list with 2 integers): start and end year e.g. [1980, 2017]
            default: None --> take year range from EM-DAT file
        countries (list of str): country ISO3-codes or names, e.g. ['JAM'].
            Set to None or ['all'] for all countries (default)
        reference_year (int): reference year of exposures. Impact is scaled
            proportional to GDP to the value of the reference year. No scaling
            for reference_year=0 (default)
        imp_str (str): Column name of impact metric in EMDAT CSV,
            default = "Total Damages ('000 US$)"

    Returns:
        impact_instance (instance of climada.engine.Impact):
            impact object of same format as output from CLIMADA
            impact computation.
            Values scaled with GDP to reference_year if reference_year is given.
            i.e. current US$ for imp_str="Total Damages ('000 US$) scaled" (factor 1000 is applied)
            impact_instance.eai_exp holds expected annual impact for each country.
            impact_instance.coord_exp holds rough central coordinates for each country.
        countries (list): ISO3-codes of countries in same order as in impact_instance.eai_exp
    """
    if "Total Damages" in imp_str:
        imp_str = "Total Damages ('000 US$)"
    elif "Insured Damages" in imp_str:
        imp_str = "Insured Damages ('000 US$)"
    elif "Reconstruction Costs" in imp_str:
        imp_str = "Reconstruction Costs ('000 US$)"
    imp_str = VARNAMES_EMDAT[max(VARNAMES_EMDAT.keys())][imp_str]
    if not hazard_type_emdat:
        hazard_type_emdat = [hazard_type_climada]
    if reference_year == 0:
        reference_year = None
    # Inititate Impact-instance:
    impact_instance = Impact()

    impact_instance.tag = dict()
    impact_instance.tag['haz'] = TagHaz(
        haz_type=hazard_type_climada,
        file_name=emdat_file_csv,
        description='EM-DAT impact, direct import')
    impact_instance.tag['exp'] = Tag(
        file_name=emdat_file_csv, description='EM-DAT impact, direct import')
    impact_instance.tag['if_set'] = Tag(file_name=None, description=None)

    # Load EM-DAT impact data by event:
    em_data = emdat_impact_event(emdat_file_csv,
                                 countries=countries,
                                 hazard=hazard_type_emdat,
                                 year_range=year_range,
                                 reference_year=reference_year,
                                 imp_str=imp_str,
                                 version=max(VARNAMES_EMDAT.keys()))

    if isinstance(countries, str):
        countries = [countries]
    elif not countries:
        countries = emdat_countries_by_hazard(emdat_file_csv,
                                              year_range=year_range,
                                              hazard=hazard_type_emdat)[0]

    if em_data.empty:
        return impact_instance, countries
    impact_instance.event_id = np.array(em_data.index, int)
    impact_instance.event_name = list(em_data[VARNAMES_EMDAT[max(
        VARNAMES_EMDAT.keys())]['Dis No']])

    date_list = list()
    for year in list(em_data['Year']):
        date_list.append(datetime.toordinal(datetime.strptime(str(year),
                                                              '%Y')))
    if 'Start Year' in em_data.columns and 'Start Month' in em_data.columns \
            and 'Start Day' in em_data.columns:
        idx = 0
        for year, month, day in zip(em_data['Start Year'],
                                    em_data['Start Month'],
                                    em_data['Start Day']):
            if np.isnan(year):
                idx += 1
                continue
            if np.isnan(month):
                month = 1
            if np.isnan(day):
                day = 1
            date_list[idx] = datetime.toordinal(
                datetime.strptime('%02i/%02i/%04i' % (day, month, year),
                                  '%d/%m/%Y'))
            idx += 1
    impact_instance.date = np.array(date_list, int)
    impact_instance.crs = DEF_CRS

    if not reference_year:
        impact_instance.at_event = np.array(em_data["impact"])
    else:
        impact_instance.at_event = np.array(em_data["impact_scaled"])
    impact_instance.at_event[np.isnan(impact_instance.at_event)] = 0
    if not year_range:
        year_range = [em_data['Year'].min(), em_data['Year'].max()]
    impact_instance.frequency = np.ones(
        em_data.shape[0]) / (1 + np.diff(year_range))
    impact_instance.tot_value = 0
    impact_instance.aai_agg = np.nansum(impact_instance.at_event *
                                        impact_instance.frequency)
    impact_instance.unit = 'USD'
    impact_instance.imp_mat = []

    # init rough exposure with central point per country
    shp = shapereader.natural_earth(resolution='110m',
                                    category='cultural',
                                    name='admin_0_countries')
    shp = shapefile.Reader(shp)
    countries_reg_id = list()
    countries_lat = list()
    countries_lon = list()
    impact_instance.eai_exp = np.zeros(
        len(countries))  # empty: damage at exposure
    for idx, cntry in enumerate(countries):
        try:
            cntry = iso_cntry.get(cntry).alpha3
        except KeyError:
            print(cntry)
            LOGGER.error('Country not found in iso_country: %s', cntry)
        cntry_boolean = False
        for rec_i, rec in enumerate(shp.records()):
            if rec[9].casefold() == cntry.casefold():
                bbox = shp.shapes()[rec_i].bbox
                cntry_boolean = True
                break
        if cntry_boolean:
            countries_lat.append(np.mean([bbox[1], bbox[3]]))
            countries_lon.append(np.mean([bbox[0], bbox[2]]))
        else:
            countries_lat.append(np.nan)
            countries_lon.append(np.nan)
        try:
            countries_reg_id.append(int(iso_cntry.get(cntry).numeric))
        except KeyError:
            countries_reg_id.append(0)
        df_tmp = em_data[em_data[VARNAMES_EMDAT[max(
            VARNAMES_EMDAT.keys())]['ISO']].str.contains(cntry)]
        if not reference_year:
            impact_instance.eai_exp[idx] = sum(
                np.array(df_tmp["impact"]) * impact_instance.frequency[0])
        else:
            impact_instance.eai_exp[idx] = sum(
                np.array(df_tmp["impact_scaled"]) *
                impact_instance.frequency[0])

    impact_instance.coord_exp = np.stack([countries_lat, countries_lon],
                                         axis=1)
    return impact_instance, countries