Exemple #1
0
def temporal_validation(result_paths, ed_fueltype_yh, elec_factored_yh,
                        plot_criteria):
    """National hourly electricity data is validated with
    the summed modelled hourly demand for all regions.
    Because the total annual modelled and real demands
    do not match (because of different data sources
    and because Northern Ireland is not included in the
    validation data) a correction factor is used.

    Arguments
    ---------
    result_paths : dict
        Paths
    ed_fueltype_yh : array
        Fuel type specific yh energy demand
    plot_criteria : bool
        Criteria to show plots or not
    """
    # ----------------
    # Plot a full year
    # ----------------
    days_to_plot = list(range(0, 365))
    elec_national_data.compare_results(
        'validation_temporal_electricity_8760h.pdf',
        result_paths['data_results_validation'],
        elec_factored_yh,
        ed_fueltype_yh,
        'all_submodels',
        days_to_plot,
        plot_crit=plot_criteria)

    # Plot four weeks (one of each season)
    winter_week = list(
        range(date_prop.date_to_yearday(2015, 1, 12),
              date_prop.date_to_yearday(2015, 1, 19)))  #Jan
    spring_week = list(
        range(date_prop.date_to_yearday(2015, 5, 11),
              date_prop.date_to_yearday(2015, 5, 18)))  #May
    summer_week = list(
        range(date_prop.date_to_yearday(2015, 7, 13),
              date_prop.date_to_yearday(2015, 7, 20)))  #Jul
    autumn_week = list(
        range(date_prop.date_to_yearday(2015, 10, 12),
              date_prop.date_to_yearday(2015, 10, 19)))  #Oct
    days_to_plot = winter_week + spring_week + summer_week + autumn_week

    elec_national_data.compare_results(
        'validation_temporal_electricity_weeks_selection.pdf',
        result_paths['data_results_validation'],
        elec_factored_yh,
        ed_fueltype_yh,
        'all_submodels',
        days_to_plot,
        plot_crit=plot_criteria)

    return
def test_date_to_yearday():
    """Testing
    """
    in_year = 2015
    in_month = 6
    in_day = 13
    expected = 164 - 1

    # call function
    out_value = date_prop.date_to_yearday(in_year, in_month, in_day)

    assert out_value == expected
def assign_hes_data_to_year(nr_of_appliances, hes_data, base_yr):
    """Fill every base year day with correct data

    Arguments
    ----------
    nr_of_appliances : dict
        Defines how many appliance types are stored (max 10 provided in original hes file)
    hes_data : array
        HES raw data for every month and daytype and appliance
    base_yr : float
        Base year to generate shapes

    Returns
    -------
    year_raw_values : array
        Energy data for every day in the base year for every appliances
    """
    year_raw_values = np.zeros((365, 24, nr_of_appliances), dtype=float) #yeardays, houry, appliances

    # Create list with all dates of a whole year
    list_dates = date_prop.fullyear_dates(
        start=date(base_yr, 1, 1),
        end=date(base_yr, 12, 31))

    # Assign every date to the place in the array of the year
    for yearday_date in list_dates:

        month_python = date_prop.get_month_from_yeraday(
            yearday_date.timetuple().tm_year,
            yearday_date.timetuple().tm_yday)

        yearday_python = date_prop.date_to_yearday(
            yearday_date.timetuple().tm_year,
            yearday_date.timetuple().tm_mon,
            yearday_date.timetuple().tm_mday)

        daytype_str = date_prop.get_weekday_type(yearday_date)

        # Get day from HES raw data array
        year_raw_values[yearday_python] = hes_data[daytype_str][month_python]

    return year_raw_values
def run(
        path_files,
        path_out_files,
        crit_missing_values=100,
        crit_nr_of_zeros=500,
        nr_daily_zeros=10
    ):
    """Iterate weather data from MIDAS and
    generate annual hourly temperatre data files
    for every weather station by interpolating missing values
    http://data.ceda.ac.uk/badc/ukmo-midas/data/WH/yearly_files/
    The data are obtained from the Centre for Environmental Data Analysis
    [Download link]http://data.ceda.ac.uk/badc/ukmo-midas/data/WH/yearly_files/ ()
    http://badc.nerc.ac.uk/artefacts/badc_datadocs/ukmo-midas/WH_Table.html (metadata)
    Weather Stations information: http://badc.nerc.ac.uk/search/midas_stations/
    http://badc.nerc.ac.uk/cgi-bin/midas_stations/search_by_name.cgi.py?name=&minyear=&maxyear=
    The values are then written to a created folder as follows:
        year__stationname.txt
    Arguments
    ---------
    path_files : str
        Path to folder with original weather data files
    path_out_files : str
        Path to folder where the cleaned data are stored
    crit_missing_values : int
        Criteria of how many missing values there mus bet until
        a weather station is discarded
    crit_nr_of_zeros : int
        Criteria of how many zeros there must be per year until
        weather station is discarde
    nr_daily_zeros : int
        How many zero values there can be maxmum in a day in the
        year until the station is discarded
    Note
    -----
        - In case of a leap year the 29 of February is ignored
    """
    print("... starting to clean original weather files")

    # Stations which are outisde of the uk and are ignored
    stations_outside_UK = [
        1605, # St. Helena
        1585, # Gibraltar
        1609] # Falkland Islands

    # Create out folder to store station specific csv
    if os.path.isdir(path_out_files):
        basic_functions.delete_folder(path_out_files)
        os.mkdir(path_out_files)
    else:
        os.mkdir(path_out_files)

    # Placeholder value for missing entry
    placeholder_value = np.nan

    # Read all files
    all_annual_raw_files = os.listdir(path_files)

    # Sort according to year
    all_annual_raw_files.sort()

    # Annual temperature file
    for file_name in all_annual_raw_files:
        print(" ")
        print("... reading csv file: " + str(file_name), flush=True)
        print(" ")
        path_to_csv = os.path.join(path_files, file_name)

        temp_stations = {}

        with open(path_to_csv, 'r') as csvfile:
            read_lines = csv.reader(csvfile, delimiter=',')

            for row in read_lines:

                date_measurement = row[0].split(" ")
                year = int(date_measurement[0].split("-")[0])
                month = int(date_measurement[0].split("-")[1])
                day = int(date_measurement[0].split("-")[2])
                hour = int(date_measurement[1][:2])

                yearday = date_prop.date_to_yearday(year, month, day)

                if date_prop.is_leap_year(year):
                    yearday = yearday - 1 # Substract because 29. of Feb is later ignored
                else:
                    pass

                year_hour = (yearday * 24) + hour

                # Weather station id
                station_id = int(row[5])

                # If station is outside uk or leap year day
                if (station_id in stations_outside_UK) or (month == 2 and day == 29):
                    pass
                else:
                    # Air temperature in Degrees Celcius
                    if row[35] == ' ' or row[35] == '': # If no data point
                        air_temp = placeholder_value
                    else:
                        air_temp = float(row[35])

                    # Add weather station if not already added to dict
                    if station_id not in temp_stations:
                        temp_stations[station_id] = np.zeros((8760), dtype="float")
                    else:
                        pass

                    temp_stations[station_id][year_hour] = air_temp

        # ------------------------------------------
        # Interpolate missing values (np.nan)
        # ------------------------------------------
        temp_stations_cleaned_reshaped = {}

        for station in list(temp_stations.keys()):

            # ------------------------
            # Number of empty  values
            # ------------------------
            nans, x = nan_helper(temp_stations[station])
            nr_of_nans = list(nans).count(True)

            # ------------------------
            # nr of zeros
            # ------------------------
            try:
                nr_of_zeros = collections.Counter(temp_stations[station])[0]
            except KeyboardInterrupt:
                nr_of_zeros = 0

            # --
            # Count number of zeros which follow
            # --
            max_cnt_zeros = count_sequence_of_zeros(temp_stations[station])

            if nr_of_nans > crit_missing_values or nr_of_zeros > crit_nr_of_zeros or max_cnt_zeros > nr_daily_zeros:
                print("Zeros in sequence: {} nr_of_nans: {} nr_of_zeros: {} Ignored station: {} {}".format(
                    max_cnt_zeros,
                    nr_of_nans,
                    nr_of_zeros,
                    station,
                    year), flush=True)
            else:
                # Interpolate missing np.nan values
                temp_stations[station][nans] = np.interp(
                    x(nans),
                    x(~nans),
                    temp_stations[station][~nans])

                # test if still np.nan value
                list_with_all_nan_args = list(np.argwhere(np.isnan(temp_stations[station])))

                if list_with_all_nan_args:
                    raise Exception("Still has np.nan entries")

                # Replace with day, hour array
                interpolated_values_reshaped = temp_stations[station].reshape(365, 24)
                temp_stations_cleaned_reshaped[station] = interpolated_values_reshaped

        # Write temperature data out to csv file
        for station_name, temp_values in temp_stations_cleaned_reshaped.items():

            file_name = "{}__{}.{}".format(year, station_name, "txt")

            path_out = os.path.join(path_out_files, file_name)

            np.savetxt(
                path_out,
                temp_values,
                delimiter=",")
        print("--Number of stations '{}'".format(len(list(temp_stations_cleaned_reshaped.keys()))))

    print("... finished cleaning weather data")
Exemple #5
0
def read_raw_elec_2015(path_to_csv, year=2015):
    """Read in national electricity values provided
    in MW and convert to GWh

    Arguments
    ---------
    path_to_csv : str
        Path to csv file
    year : int
        Year of data

    Returns
    -------
    elec_data_indo : array
        Hourly INDO electricity in GWh (INDO - National Demand)
    elec_data_itsdo : array
        Hourly ITSDO electricity in GWh (Transmission System Demand)

    Note
    -----
    Half hourly measurements are aggregated to hourly values

    Necessary data preparation: On 29 March and 25 Octobre
    there are 46 and 48 values because of the changing of the clocks
    The 25 Octobre value is omitted, the 29 March hour interpolated
    in the csv file

    Source
    ------
    http://www2.nationalgrid.com/uk/Industry-information/electricity-transmission-operational-data/
    For more information on INDO and ISTDO see DemandData Field Descriptions file:
    http://www2.nationalgrid.com/WorkArea/DownloadAsset.aspx?id=8589934632

    National Demand is calculated as a sum
    of generation based on National Grid
    operational generation metering
    """
    elec_data_indo = np.zeros((365, 24), dtype="float")
    elec_data_itsdo = np.zeros((365, 24), dtype="float")

    with open(path_to_csv, 'r') as csvfile:
        read_lines = csv.reader(csvfile, delimiter=',')
        _headings = next(read_lines)

        hour = 0
        counter_half_hour = 0

        for line in read_lines:
            month = basic_functions.get_month_from_string(
                line[0].split("-")[1])
            day = int(line[0].split("-")[0])

            # Get yearday
            yearday = date_prop.date_to_yearday(year, month, day)

            if counter_half_hour == 1:
                counter_half_hour = 0

                # Sum value of first and second half hour
                hour_elec_demand_INDO = half_hour_demand_indo + float(line[2])
                hour_elec_demand_ITSDO = half_hour_demand_itsdo + float(
                    line[4])

                # Convert MW to GWH (input is MW aggregated for two half
                # hourly measurements, therfore divide by 0.5)
                elec_data_indo[yearday][hour] = conversions.mw_to_gwh(
                    hour_elec_demand_INDO, 0.5)
                elec_data_itsdo[yearday][hour] = conversions.mw_to_gwh(
                    hour_elec_demand_ITSDO, 0.5)

                hour += 1
            else:
                counter_half_hour += 1

                half_hour_demand_indo = float(
                    line[2])  # INDO - National Demand
                half_hour_demand_itsdo = float(
                    line[4])  # Transmission System Demand

            if hour == 24:
                hour = 0

    return elec_data_indo, elec_data_itsdo
Exemple #6
0
def run(path_files, path_out_files, path_weather_stations, crit_min_max=False):
    """Iterate weather data from MIDAS and
    generate annual hourly temperatre data files
    for every weather station by interpolating missing values
    http://data.ceda.ac.uk/badc/ukmo-midas/data/WH/yearly_files/

    The data are obtained from the Centre for Environmental Data Analysis

    [Download link]http://data.ceda.ac.uk/badc/ukmo-midas/data/WH/yearly_files/ ()

    http://badc.nerc.ac.uk/artefacts/badc_datadocs/ukmo-midas/WH_Table.html (metadata)

    Weather Stations information: http://badc.nerc.ac.uk/search/midas_stations/
    http://badc.nerc.ac.uk/cgi-bin/midas_stations/search_by_name.cgi.py?name=&minyear=&maxyear=

    The values are then written to a created folder as follows:

        year__stationname.txt

    Arguments
    ---------
    path_files : str
        Path to folder with original weather data files
    path_out_files : str
        Path to folder where the cleaned data are stored

    crit_missing_values : int
        Criteria of how many missing values there mus bet until
        a weather station is discarded
    crit_nr_of_zeros : int
        Criteria of how many zeros there must be per year until
        weather station is discarde
    nr_daily_zeros : int
        How many zero values there can be maxmum in a day in the
        year until the station is discarded

    Note
    -----
        - In case of a leap year the 29 of February is ignored
    """
    logging.info("... starting to clean original weather files")

    # Load coordinates of weather stations
    weather_stations = data_loader.read_weather_stations_raw(
        path_weather_stations)

    # Stations which are outisde of the uk and are ignored
    stations_outside_UK = [
        1605,  # St. Helena
        1585,  # Gibraltar
        1609
    ]  # Falkland Islands

    # Create out folder to store station specific csv
    if os.path.isdir(path_out_files):
        basic_functions.delete_folder(path_out_files)
        os.mkdir(path_out_files)
    else:
        os.mkdir(path_out_files)

    # Placeholder value for missing entry
    placeholder_value = np.nan

    # Read all files
    all_annual_raw_files = os.listdir(path_files)

    # Sort according to year
    all_annual_raw_files.sort()

    # Annual temperature file
    for file_name in all_annual_raw_files:
        logging.info("... reading in file: %s", file_name)

        path_to_csv = os.path.join(path_files, file_name)
        temp_stations = {}
        temp_stations_min_max = defaultdict(dict)

        with open(path_to_csv, 'r') as csvfile:
            read_lines = csv.reader(csvfile, delimiter=',')

            for row in read_lines:
                date_measurement = row[0].split(" ")
                year = int(date_measurement[0].split("-")[0])
                month = int(date_measurement[0].split("-")[1])
                day = int(date_measurement[0].split("-")[2])
                hour = int(date_measurement[1][:2])

                yearday = date_prop.date_to_yearday(year, month, day)

                if date_prop.is_leap_year(year):
                    yearday = yearday - 1  # Substract because 29. of Feb is later ignored
                else:
                    pass

                year_hour = (yearday * 24) + hour

                # Weather station id
                station_id = int(row[5])

                # If station is outside uk or leap year day
                if (station_id in stations_outside_UK) or (month == 2
                                                           and day == 29):
                    pass
                else:
                    # Air temperature in Degrees Celcius
                    if row[35] == ' ' or row[35] == '':  # If no data point
                        air_temp = placeholder_value
                    else:
                        air_temp = float(row[35])

                    # Add weather station if not already added to dict
                    if station_id not in temp_stations:
                        if crit_min_max:
                            temp_stations_min_max[station_id][
                                't_min'] = np.zeros((365), dtype="float")
                            temp_stations_min_max[station_id][
                                't_max'] = np.zeros((365), dtype="float")
                            temp_stations[station_id] = []
                    else:
                        pass

                    if air_temp is not placeholder_value:

                        if yearday not in temp_stations[station_id]:
                            temp_stations_min_max[station_id]['t_min'][
                                yearday] = air_temp
                            temp_stations_min_max[station_id]['t_max'][
                                yearday] = air_temp
                            temp_stations[station_id].append(yearday)

                        if crit_min_max:
                            # Update min and max daily temperature
                            if air_temp < temp_stations_min_max[station_id][
                                    't_min'][yearday]:
                                temp_stations_min_max[station_id]['t_min'][
                                    yearday] = air_temp
                            if air_temp > temp_stations_min_max[station_id][
                                    't_max'][yearday]:
                                temp_stations_min_max[station_id]['t_max'][
                                    yearday] = air_temp

        # ------------------------
        # Delete weather stations with missing daily data inputs
        # ------------------------
        stations_to_delete = []
        for station_id in temp_stations_min_max.keys():

            nans, x = nan_helper(temp_stations_min_max[station_id]['t_min'])
            nr_of_nans_t_min = list(nans).count(True)

            nans, x = nan_helper(temp_stations_min_max[station_id]['t_max'])
            nr_of_nans_t_max = list(nans).count(True)

            if nr_of_nans_t_min > 0 or nr_of_nans_t_max > 0:
                print("Station '{}' contains {} {} .nan values and is deleted".
                      format(station_id, nr_of_nans_t_min, nr_of_nans_t_max))
                stations_to_delete.append(station_id)

        print("Number of stations to delete: {}".format(
            len(stations_to_delete)))
        for i in stations_to_delete:
            del temp_stations_min_max[i]

        # --------------------
        # Write out files
        # --------------------
        path_out_stations = os.path.join(path_out_files,
                                         '{}_stations.csv'.format(str(year)))
        path_out_t_min = os.path.join(path_out_files,
                                      "{}_t_min.npy".format(str(year)))
        path_out_t_max = os.path.join(path_out_files,
                                      "{}_t_max.npy".format(str(year)))

        # Check if weather station is defined
        stations_to_delete = []
        for name in temp_stations_min_max.keys():
            try:
                _ = weather_stations[name]['latitude']
                _ = weather_stations[name]['longitude']
            except KeyError:
                print(
                    "... no coordinates are available for weather station '{}'"
                    .format(name))
                stations_to_delete.append(name)

        print("... number of stations to delete: {}".format(
            len(stations_to_delete)))
        for name in stations_to_delete:
            del temp_stations_min_max[name]

        stations = list(temp_stations_min_max.keys())
        out_list = []
        for station_name in stations:
            out_list.append([
                station_name, weather_stations[station_name]['latitude'],
                weather_stations[station_name]['longitude']
            ])

        # Write
        df = pd.DataFrame(np.array(out_list),
                          columns=['station_id', 'latitude', 'longitude'])
        df.to_csv(path_out_stations, index=False)

        stations_t_min = list(i['t_min']
                              for i in temp_stations_min_max.values())
        stations_t_max = list(i['t_max']
                              for i in temp_stations_min_max.values())
        stations_t_min = np.array(stations_t_min)
        stations_t_max = np.array(stations_t_max)

        np.save(path_out_t_min, stations_t_min)
        np.save(path_out_t_max, stations_t_max)

    logging.info("... finished cleaning weather data")