def get_ts_from_complete_data(complete_data, mask, keys, dates=None):

    if keys == None:
        keys = list(complete_data.keys())

    common_dates = becgis.common_dates([complete_data[key][1] for key in keys])
    becgis.assert_proj_res_ndv([complete_data[key][0] for key in keys])

    MASK = becgis.open_as_array(mask, nan_values=True)

    tss = dict()

    for key in keys:

        var_mm = np.array([])

        for date in common_dates:

            tif = complete_data[key][0][complete_data[key][1] == date][0]

            DATA = becgis.open_as_array(tif, nan_values=True)
            DATA[np.isnan(DATA)] = 0.0

            DATA[np.isnan(MASK)] = np.nan

            var_mm = np.append(var_mm, np.nanmean(DATA))

        tss[key] = (common_dates, var_mm)

    return tss
def sum_ts(flow_csvs):

    flows = list()
    dates = list()

    for cv in flow_csvs:

        coordinates, flow_ts, station_name, unit = pwv.create_dict_entry(cv)
        flow_dates, flow_values = list(zip(*flow_ts))
        flow_dates = becgis.convert_datetime_date(flow_dates)
        if unit == 'm3/s':
            flow_values = np.array([
                flow_values[flow_dates == date] * 60 * 60 * 24 *
                calendar.monthrange(date.year, date.month)[1] / 1000**3
                for date in flow_dates
            ])[:, 0]
        flows.append(flow_values)
        dates.append(flow_dates)

    common_dates = becgis.common_dates(dates)

    data = np.zeros(np.shape(common_dates))
    for flow_values, flow_dates in zip(flows, dates):
        add_data = np.array([
            np.array(flow_values)[flow_dates == date][0]
            for date in common_dates
        ])
        data += add_data

    return data, common_dates
def get_ts_from_complete_data_spec(complete_data, lu_fh, keys, a, dates=None):

    if keys == None:
        keys = list(complete_data.keys())

    common_dates = becgis.common_dates([complete_data[key][1] for key in keys])
    becgis.assert_proj_res_ndv([complete_data[key][0] for key in keys])

    MASK = becgis.open_as_array(lu_fh, nan_values=True)

    lucs = lucs = gd.get_sheet4_6_classes()
    gw_classes = list()
    for subclass in [
            'Forests', 'Rainfed Crops', 'Shrubland', 'Forest Plantations'
    ]:
        gw_classes += lucs[subclass]
    mask_gw = np.logical_or.reduce([MASK == value for value in gw_classes])

    tss = dict()

    for key in keys:

        var_mm = np.array([])

        for date in common_dates:

            tif = complete_data[key][0][complete_data[key][1] == date][0]

            DATA = becgis.open_as_array(tif, nan_values=True)
            DATA[np.isnan(DATA)] = 0.0

            DATA[np.isnan(MASK)] = np.nan

            alpha = np.ones(np.shape(DATA)) * a

            alpha[mask_gw] = 0.0

            var_mm = np.append(var_mm, np.nanmean(DATA * alpha))

        tss[key] = (common_dates, var_mm)

    return tss
Example #4
0
def compare_rasters2rasters_per_lu(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, lu_fh, output_dir, dataset_names = ["DS1", "DS2"], class_dictionary = None, no_of_classes = 6):
    """
    Compare two raster datasets with eachother per different landuse categories.
    
    Parameters
    ----------
    ds1_fhs : ndarray
        Array with strings pointing to maps of dataset 1.
    ds1_dates : ndarray
        Array with same shape as ds1_fhs, containing datetime.date objects.
    ds2_fhs : ndarray
        Array with strings pointing to maps of dataset 2.
    ds2_dates : ndarray
        Array with same shape as ds2_fhs, containing datetime.date objects.
    lu_fh : str
        Pointer to a landusemap.
    output_dir : str
        Map to save results.
    dataset_names : list, optional
        List with two strings describing the names of the two datasets.
    class_dictionary : dict
        Dictionary specifying all the landuse categories.
    no_of_classes : int
        The 'no_of_classes' most dominant classes in the the lu_fh are compared, the rest is ignored.
    
    """
    LUCS = becgis.open_as_array(lu_fh, nan_values = True)
    DS1 = becgis.open_as_array(ds1_fhs[0], nan_values = True)
    DS2 = becgis.open_as_array(ds2_fhs[0], nan_values = True)
    
    DS1[np.isnan(DS2)] = np.nan
    LUCS[np.isnan(DS1)] = np.nan
    
    classes, counts = np.unique(LUCS[~np.isnan(LUCS)], return_counts = True)
    counts_sorted = np.sort(counts)[-no_of_classes:]
    selected_lucs = [classes[counts == counter][0] for counter in counts_sorted]
    
    driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(lu_fh)
    becgis.create_geotiff(lu_fh.replace('.tif','_.tif'), LUCS, driver, NDV, xsize, ysize, GeoT, Projection)

    common_dates = becgis.common_dates([ds1_dates, ds2_dates])
    
    ds1_totals = np.array([])
    ds2_totals = np.array([])
    
    DS1_per_class = dict()
    DS2_per_class = dict()
    
    for date in common_dates:
        
        DS1 = becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True)
        DS2 = becgis.open_as_array(ds2_fhs[ds2_dates == date][0], nan_values = True)
        
        for clss in selected_lucs:
            
            if clss in list(DS1_per_class.keys()):
                DS1_per_class[clss] = np.append(DS1_per_class[clss], np.nanmean(DS1[LUCS == clss]))
            else:
                DS1_per_class[clss] = np.array([np.nanmean(DS1[LUCS == clss])])
                
            if clss in list(DS2_per_class.keys()):
                DS2_per_class[clss] = np.append(DS2_per_class[clss], np.nanmean(DS2[LUCS == clss]))
            else:
                DS2_per_class[clss] = np.array([np.nanmean(DS2[LUCS == clss])])

        ds1_totals = np.append(ds1_totals, np.nanmean(DS1))
        ds2_totals = np.append(ds2_totals, np.nanmean(DS2))
        
        print("Finished {0}, going to {1}".format(date, common_dates[-1]))
    
    for clss in selected_lucs:
        
        if class_dictionary is None:
            plot_scatter_series(DS1_per_class[clss], DS2_per_class[clss], dataset_names[0], dataset_names[1], clss, output_dir)
        else:
            cats = {v[0]: k for k, v in list(class_dictionary.items())}
            plot_scatter_series(DS1_per_class[clss], DS2_per_class[clss], dataset_names[0], dataset_names[1], cats[clss], output_dir)
            
    plot_scatter_series(ds1_totals, ds2_totals, dataset_names[0], dataset_names[1], "Total Area", output_dir)

    if class_dictionary is not None:
        output_fh = os.path.join(output_dir, 'landuse_percentages.png')
        driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(lu_fh)
        becgis.create_geotiff(lu_fh.replace('.tif','_.tif'), LUCS, driver, NDV, xsize, ysize, GeoT, Projection)
        becgis.plot_category_areas(lu_fh.replace('.tif','_.tif'), class_dictionary, output_fh, area_treshold = 0.01)
        os.remove(lu_fh.replace('.tif','_.tif'))
Example #5
0
def compare_rasters2rasters(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, output_dir = None, dataset_names = None, data_treshold = 0.75):
    """ 
    Compare two series of raster maps by computing
    the relative bias, RMAE, Pearson-correlation coefficient and
    the Nash-Sutcliffe coefficient per pixel.
    
    Parameters
    ----------
    ds1_fhs : list
        list pointing to georeferenced raster files of dataset 1.
    ds1_dates : list
        list corresponding to ds1_fhs specifying the dates.
    ds2_fhs : list
        list pointing to georeferenced raster files of dataset 2.
    ds2_dates : list
        list corresponding to ds2_fhs specifying the dates.
    quantity_unit  : list, optional
        list of two strings describing the quantity and unit of the data. e.g. ['Precipitation', 'mm/month'].
    dataset_names : list, optional
        list of strings describing the names of the datasets. e.g. ['CHIRPS', 'ERA-I'].
    output_dir : list, optional
        directory to store some results, i.e. (1) a graph of the spatially averaged datasets trough time and the
        bias and (2) 4 geotiffs showing the bias, nash-sutcliffe coefficient, pearson coefficient and rmae per pixel.
    data_treshold : float, optional
        pixels with less than data_treshold * total_number_of_samples actual values are set to no-data, i.e. pixels with
        too few data points are ignored.
        
    Returns
    -------
    results : dict
        dictionary with four keys (relative bias, RMAE, Pearson-correlation coefficient and 
        the Nash-Sutcliffe) with 2dnarrays of the values per pixel.
        
    Examples
    --------
    >>> results = compare_rasters2rasters(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, 
                                          output_dir = r"C:/Desktop/", quantity_unit = ["P", "mm/month"], 
                                          dataset_names = ["CHIRPS", "TRMM"])
    """
    becgis.assert_proj_res_ndv([ds1_fhs, ds2_fhs])
    
    if dataset_names is None:
        dataset_names = ['DS1','DS2']
    
    driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(ds1_fhs[0])
    
    common_dates = becgis.common_dates([ds1_dates, ds2_dates])
    
    diff_sum = np.zeros((ysize,xsize))
    non_nans = np.zeros((ysize,xsize))
    
    progress = 0 
    samples = len(common_dates)
    
    for date in common_dates:
        
        DS1 = becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True)
        DS2 = becgis.open_as_array(ds2_fhs[ds2_dates == date][0], nan_values = True)
        
        DS1[np.isnan(DS2)] = np.nan
        DS2[np.isnan(DS1)] = np.nan
        
        non_nans[~np.isnan(DS1)] += np.ones((ysize,xsize))[~np.isnan(DS1)]
        
        diff = (DS1 - DS2)**2
        diff_sum[~np.isnan(DS1)] += diff[~np.isnan(DS1)]
        
        progress += 1
        print("progress: {0} of {1} finished".format(progress, samples))

    diff_sum[non_nans <= data_treshold*samples] = np.nan
    results = dict()
    results['rmse'] = np.where(non_nans == 0., np.nan, np.sqrt(diff_sum / non_nans))
    
    startdate = common_dates[0].strftime('%Y%m%d')
    enddate = common_dates[-1].strftime('%Y%m%d')
    
    path = os.path.join(output_dir, 'spatial_errors')
    if not os.path.exists(path):
        os.makedirs(path)
        
    if output_dir is not None:
        for varname in list(results.keys()):
            fh = os.path.join(path, '{0}_{1}_vs_{2}_{3}_{4}.tif'.format(varname, dataset_names[0], dataset_names[1], startdate, enddate))
            becgis.create_geotiff(fh, results[varname], driver, NDV, xsize, ysize, GeoT, Projection)

    return results 
Example #6
0
def compare_rasters2stations(ds1_fhs, ds1_dates, station_dict, output_dir, station_names = None, quantity_unit = None, dataset_names = None, method = 'cubic', min_records = 1):
    """
    Compare a series of raster maps with station time series by computing
    the relative bias, RMAE, Pearson-correlation coefficient and 
    the Nash-Sutcliffe coefficient for each station.
    
    Parameters
    ----------
    ds1_fhs : 1dnarray
        List containing filehandles to georeferenced raster files.
    ds1_dates : 1dnarray
        List containing datetime.date or datetime.datetime objects corresponding
        to the filehandles in ds1_fhs. Lenght should be equal to ds1_fhs.
    station_dict : dictionary
        Dictionary containing coordinates of stations and timeseries. See examples
        below for an example
    output_dir : str, optional
        Directory to store several results, i.e. (1) a csv file to load in a GIS program, 
        (2) interpolated maps showing the various error indicators spatially and (3)
        scatter plots for all the stations.
    station_names : dictionary, optional
        Dictionary containing names of the respective stations which can be added to the csv-file, see
        Examples for more information.
    quantity_unit : list, optional
        List of two strings describing the quantity and unit of the data.
    dataset_name : list, optional
        List of strings describing the names of the datasets.
    method : str, optional
        Method used for interpolation of the error-indicators, i.e.: 'linear', 'nearest' or 'cubic' (default).
    
    Returns
    -------
    results : dictionary
        Dictionary containing several error indicators per station.

    Examples
    --------
    
    >>> station_dict = {(lat1, lon1): [(datetime.date(year, month, day), data_value), 
                                       (datetime.date(year, month, day), data_value), 
                                        etc.],
                        (lat2, lon2): [(datetime.date(year, month, day), data_value), 
                                       (datetime.date(year, month, day), data_value), 
                                        etc.],
                         etc.}
                    
    >>> station_names = {(lat1,lon1): 'stationname1', (lat2,lon2): 'stationname2', etc.}
    
    >>> results = compare_rasters2stations(ds1_fhs, ds1_dates, station_dict, output_dir = r"C:/Desktop",
                                station_names = None, quantity_unit = ["P", "mm/month"], 
                                dataset_names = ["CHIRPS", "Meteo Stations"], 
                                method = 'cubic')
    """
    results = dict()
    pixel_coordinates = list()
    
    if dataset_names is None:
        dataset_names = ['Spatial', 'Station']
    if quantity_unit is not None:
        quantity_unit[1] = r'[' + quantity_unit[1] + r']'
    else:
        quantity_unit = ['data', '']
        
    becgis.assert_proj_res_ndv([ds1_fhs])
    no_of_stations = len(list(station_dict.keys()))
    ds1_dates = becgis.convert_datetime_date(ds1_dates, out = 'datetime')

    for i, station in enumerate(station_dict.keys()):
        
        station_dates, station_values = unzip(station_dict[station])
        common_dates = becgis.common_dates([ds1_dates, station_dates])
        sample_size = common_dates.size
        
        if sample_size >= min_records:
            ds1_values = list()
            xpixel, ypixel = pixelcoordinates(station[0], station[1], ds1_fhs[0])
            
            if np.any([np.isnan(xpixel), np.isnan(ypixel)]):
                print("Skipping station ({0}), cause its not on the map".format(station))
                continue
            else:
                for date in common_dates:
                    ds1_values.append(becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True)[ypixel, xpixel])
                    
                common_station_values = [station_values[station_dates == date][0] for date in common_dates]
                
                results[station] = pairwise_validation(ds1_values, common_station_values)
                results[station] += (sample_size,)
                         
                pixel_coordinates.append((xpixel, ypixel))
                #m, b = np.polyfit(ds1_values, common_station_values, 1)  
                
                path_scatter = os.path.join(output_dir, 'scatter_plots')
                if not os.path.exists(path_scatter):
                    os.makedirs(path_scatter)
                    
                path_ts = os.path.join(output_dir, 'time_series')
                if not os.path.exists(path_ts):
                    os.makedirs(path_ts)
                    
                path_int = os.path.join(output_dir, 'interp_errors')
                if not os.path.exists(path_int):
                    os.makedirs(path_int)
                
                xlabel = '{0} {1} {2}'.format(dataset_names[0], quantity_unit[0], quantity_unit[1])
                ylabel = '{0} {1} {2}'.format(dataset_names[1], quantity_unit[0], quantity_unit[1])
                if station_names is not None:
                    title = station_names[station]
                    fn = os.path.join(path_scatter,'{0}_vs_{1}.png'.format(station_names[station], dataset_names[0]))
                    fnts = os.path.join(path_ts,'{0}_vs_{1}.png'.format(station_names[station], dataset_names[0]))
                else:
                    title = station
                    fn = os.path.join(path_scatter,'{0}_vs_station_{1}.png'.format(dataset_names[0],i))
                    fnts = os.path.join(path_ts,'{0}_vs_station_{1}.png'.format(dataset_names[0],i)) 
                suptitle = 'pearson: {0:.5f}, rmse: {1:.5f}, ns: {2:.5f}, bias: {3:.5f}, n: {4:.0f}'.format(results[station][0],results[station][1],results[station][2],results[station][3],results[station][4])
                plot_scatter_series(ds1_values, common_station_values, xlabel, ylabel, title, fn, suptitle = suptitle, dates = common_dates)
    
                xaxis_label = '{0} {1}'.format(quantity_unit[0], quantity_unit[1])
                xlabel = '{0}'.format(dataset_names[0])
                ylabel = '{0}'.format(dataset_names[1])
                plot_time_series(ds1_values,common_station_values,common_dates,xlabel,ylabel,xaxis_label, title, fnts, suptitle = suptitle)
                
                print("station {0} ({3}) of {1} finished ({2} matching records)".format(i+1, no_of_stations, sample_size, title))
        else:
            print("____station {0} of {1} skipped____ (less than {2} matching records)".format(i+1, no_of_stations, min_records))
            continue
    
    n = len(results)
    csv_filename = os.path.join(output_dir, '{0}stations_vs_{1}_indicators.csv'.format(n, dataset_names[0]))
    with open(csv_filename, 'wb') as csv_file:
        writer = csv.writer(csv_file, delimiter=';')
        writer.writerow(['longitude','latitude','station_id','pearson','rmse','nash_sutcliffe','bias', 'no_of_samples'])
        for station in list(results.keys()):
            writer.writerow([station[1], station[0], station_names[station], results[station][0],results[station][1],results[station][2],results[station][3],results[station][4]])

    rslt = {'Relative Bias':list(),'RMSE':list(),'Pearson Coefficient':list(),'Nash-Sutcliffe Coefficient':list(),'Number Of Samples':list()}

    for value in list(results.values()):
        rslt['Relative Bias'].append(value[3])
        rslt['RMSE'].append(value[1])
        rslt['Pearson Coefficient'].append(value[0])
        rslt['Nash-Sutcliffe Coefficient'].append(value[2])
        rslt['Number Of Samples'].append(value[4])

    for key, value in list(rslt.items()):
        title = '{0}'.format(key)
        print(title)
        if key is 'RMSE':
            xlabel = '{0} [mm/month]'.format(key)
        else:
            xlabel = key
        value = np.array(value)
        value = value[(~np.isnan(value)) & (~np.isinf(value))]
        suptitle = 'mean: {0:.5f}, std: {1:.5f}, n: {2}'.format(np.nanmean(value), np.nanstd(value), n)
        print(value)
        plot_histogram(value[(~np.isnan(value)) & (~np.isinf(value))], title, xlabel, output_dir, suptitle = suptitle)
    
    driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(ds1_fhs[0])
    dummy_map = becgis.open_as_array(ds1_fhs[0])
    grid = np.mgrid[0:ysize, 0:xsize]
    var_names = ['pearson', 'rmse', 'ns', 'bias', 'no_of_samples']

    for i, var in enumerate(unzip(list(results.values()))):
        xy = np.array(pixel_coordinates)[~np.isnan(var)]
        z = var[~np.isnan(var)]
        interpolation_field = interpolate.griddata(xy, z, (grid[1], grid[0]), method=method, fill_value = np.nanmean(z))
        interpolation_field[dummy_map == NDV] = NDV
        fh = os.path.join(path_int, '{0}_{1}stations_vs_{2}.tif'.format(var_names[i], len(xy), dataset_names[0]))
        becgis.create_geotiff(fh, interpolation_field, driver, NDV, xsize, ysize, GeoT, Projection)

    return results
def create_sheet7(complete_data, metadata, output_dir, global_data, data):
    template_m = get_path('sheet7m_svg')
    template_y = get_path('sheet7y_svg')
    lu_fh = metadata['lu']
    AREA = becgis.map_pixel_area_km(lu_fh)
    output_folder = os.path.join(output_dir, metadata['name'], 'sheet7')
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    recy_ratio = metadata['recycling_ratio']

    date_list2 = becgis.common_dates([
        complete_data['etb'][1], complete_data['tr'][1],
        complete_data['recharge'][1]
    ])
    date_list = becgis.convert_datetime_date(date_list2, out='datetime')

    live_feed, feed_dict, abv_grnd_biomass_ratio, fuel_dict, sheet7_lulc_classes, c_fractions = get_sheet7_classes(
    )
    # year:fish production
    #avg 2003-2012: 375375 2013:528000 2014:505005 <http://www.fao.org/3/a-i5555e.pdf>
    #http://www.fao.org/fishery/statistics/global-production/en
    #    fish_production = {'2000':245600,
    #                       '2001':385000,
    #                       '2002':360300,
    #                       '2003':308750,
    #                       '2004':250000,
    #                       '2005':324000,
    #                       '2006':422000,
    #                       '2007':395000,
    #                       '2008':365000,
    #                       '2009':390000}

    # Select and project population to LULC map
    pop_fh = global_data['population_tif']
    #    pop_temp = os.path.join(output_folder, 'temp_pop')
    #    pop_fh = becgis.match_proj_res_ndv(lu_fh, pop_fh, pop_temp)

    # Select and project cattle to LULC map
    cattle_fh = [global_data["cattle"]]
    cattle_temp = os.path.join(output_folder, 'temp_cattle')
    cattle_fh = becgis.match_proj_res_ndv(lu_fh, cattle_fh, cattle_temp)[0]

    ndm_fhs = []
    ro_fhs = []
    et_blue_fhs = []
    et_green_fhs = []
    p_fhs = []
    dry_bf_fhs = []
    gw_rchg_fhs = []
    for d in date_list2:
        ndm_fhs.extend(complete_data['ndm'][0][complete_data['ndm'][1] == d])
        ro_fhs.extend(complete_data['tr'][0][complete_data['tr'][1] == d])
        et_blue_fhs.extend(
            complete_data['etb'][0][complete_data['etb'][1] == d])
        et_green_fhs.extend(
            complete_data['etg'][0][complete_data['etg'][1] == d])
        p_fhs.extend(complete_data['p'][0][complete_data['p'][1] == d])
        dry_bf_fhs.extend(complete_data['bf'][0][complete_data['bf'][1] == d])
        gw_rchg_fhs.extend(
            complete_data['recharge'][0][complete_data['recharge'][1] == d])

    # Make fraction maps to split feed and fuel yields in landscape and incremental ET
    fraction_fhs = split_yield(output_folder,
                               p_fhs,
                               et_blue_fhs,
                               et_green_fhs,
                               ab=(1.0, 1.0))

    # calculate feed production and return filehandles of saved tif files
    feed_fhs_landscape, feed_fhs_incremental = livestock_feed(
        output_folder, lu_fh, AREA, ndm_fhs, feed_dict, live_feed, cattle_fh,
        fraction_fhs, date_list2)

    # calculate fuel production and return filehandles of saved tif files
    fuel_fhs_landscape, fuel_fhs_incremental = fuel_wood(
        output_folder, lu_fh, AREA, ndm_fhs, fraction_fhs, date_list2)

    # calculate root_storage and return filehandles of saved tif files
    rz_depth_fh = global_data['root_depth']
    rz_depth_tif = becgis.match_proj_res_ndv(lu_fh, np.array([rz_depth_fh]),
                                             tf.mkdtemp())[0]
    rz_sm_fhs = complete_data['rzsm'][0]

    root_storage_fhs = root_zone_storage_Wpx(output_folder, rz_sm_fhs,
                                             rz_depth_tif)

    atm_recy_landscape_fhs = recycle(output_folder, et_green_fhs, recy_ratio,
                                     lu_fh, 'landscape')
    atm_recy_incremental_fhs = recycle(output_folder, et_blue_fhs, recy_ratio,
                                       lu_fh, 'incremental')

    class Vividict(dict):
        def __missing__(self, key):
            value = self[key] = type(self)()
            return value

    results = Vividict()
    for d in date_list:
        datestr1 = "%04d_%02d" % (d.year, d.month)
        datestr2 = "%04d%02d" % (d.year, d.month)
        ystr = "%04d" % (d.year)
        mstr = "%02d" % (d.month)

        ro_fh = ro_fhs[np.where(
            [datestr2 in ro_fhs[i] for i in range(len(ro_fhs))])[0][0]]
        feed_fh_landscape = feed_fhs_landscape[np.where([
            datestr1 in feed_fhs_landscape[i]
            for i in range(len(feed_fhs_landscape))
        ])[0][0]]
        feed_fh_incremental = feed_fhs_incremental[np.where([
            datestr1 in feed_fhs_incremental[i]
            for i in range(len(feed_fhs_incremental))
        ])[0][0]]
        fuel_fh_landscape = fuel_fhs_landscape[np.where([
            datestr1 in fuel_fhs_landscape[i]
            for i in range(len(fuel_fhs_landscape))
        ])[0][0]]
        fuel_fh_incremental = fuel_fhs_incremental[np.where([
            datestr1 in fuel_fhs_incremental[i]
            for i in range(len(fuel_fhs_incremental))
        ])[0][0]]

        baseflow_fh = dry_bf_fhs[np.where(
            [datestr2 in dry_bf_fhs[i] for i in range(len(dry_bf_fhs))])[0][0]]
        gw_recharge_fh = gw_rchg_fhs[np.where([
            datestr2 in gw_rchg_fhs[i] for i in range(len(gw_rchg_fhs))
        ])[0][0]]

        root_storage_fh = root_storage_fhs[np.where([
            datestr2 in root_storage_fhs[i]
            for i in range(len(root_storage_fhs))
        ])[0][0]]
        atm_recy_landscape_fh = atm_recy_landscape_fhs[np.where([
            datestr2 in atm_recy_landscape_fhs[i]
            for i in range(len(atm_recy_landscape_fhs))
        ])[0][0]]
        atm_recy_incremental_fh = atm_recy_incremental_fhs[np.where([
            datestr2 in atm_recy_incremental_fhs[i]
            for i in range(len(atm_recy_incremental_fhs))
        ])[0][0]]

        results[ystr][mstr]['tot_runoff'] = lu_type_sum(ro_fh,
                                                        lu_fh,
                                                        AREA,
                                                        sheet7_lulc_classes,
                                                        convert='mm_to_km3')
        #  results['fish'] =
        results[ystr][mstr]['feed_incremental'] = lu_type_sum(
            feed_fh_incremental, lu_fh, AREA, sheet7_lulc_classes)
        results[ystr][mstr]['feed_landscape'] = lu_type_sum(
            feed_fh_landscape, lu_fh, AREA, sheet7_lulc_classes)
        results[ystr][mstr]['fuel_incremental'] = lu_type_sum(
            fuel_fh_incremental, lu_fh, AREA, sheet7_lulc_classes)
        results[ystr][mstr]['fuel_landscape'] = lu_type_sum(
            fuel_fh_landscape, lu_fh, AREA, sheet7_lulc_classes)

        results[ystr][mstr]['baseflow'] = lu_type_sum(baseflow_fh,
                                                      lu_fh,
                                                      AREA,
                                                      sheet7_lulc_classes,
                                                      convert='mm_to_km3')
        results[ystr][mstr]['gw_rech'] = lu_type_sum(gw_recharge_fh,
                                                     lu_fh,
                                                     AREA,
                                                     sheet7_lulc_classes,
                                                     convert='mm_to_km3')
        results[ystr][mstr]['root_storage'] = lu_type_sum(root_storage_fh,
                                                          lu_fh,
                                                          AREA,
                                                          sheet7_lulc_classes,
                                                          convert='mm_to_km3')
        results[ystr][mstr]['atm_recycl_landscape'] = lu_type_sum(
            atm_recy_landscape_fh,
            lu_fh,
            AREA,
            sheet7_lulc_classes,
            convert='mm_to_km3')
        results[ystr][mstr]['atm_recycl_incremental'] = lu_type_sum(
            atm_recy_incremental_fh,
            lu_fh,
            AREA,
            sheet7_lulc_classes,
            convert='mm_to_km3')

        output_fh = output_folder + "\\sheet7_monthly\\sheet7_" + datestr1 + ".csv"
        create_csv(results[ystr][mstr], output_fh)
        output = output_folder + '\\sheet7_monthly\\sheet7_' + datestr1 + '.pdf'
        create_sheet7_svg(metadata['name'],
                          datestr1,
                          output_fh,
                          output,
                          template=template_m)

    fhs = hl.create_csv_yearly(os.path.join(output_folder, "sheet7_monthly"),
                               os.path.join(output_folder, "sheet7_yearly"),
                               7,
                               metadata['water_year_start_month'],
                               year_position=[-11, -7],
                               month_position=[-6, -4],
                               header_rows=1,
                               header_columns=3,
                               minus_header_colums=-1)
    for csv_fh in fhs:
        year = csv_fh[-8:-4]
        create_sheet7_svg(metadata['name'],
                          year,
                          csv_fh,
                          csv_fh.replace('.csv', '.pdf'),
                          template=template_y)
def create_sheet1(complete_data, metadata, output_dir, global_data):

    output_folder = os.path.join(output_dir, metadata['name'], 'sheet1')
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    output_fh_in = False

    output_fh_in, output_fh_out = create_sheet1_in_outflows(
        os.path.join(output_dir, metadata['name'], "sheet5", "sheet5_monthly"),
        metadata, output_folder)
    outflow_values, outflow_dates = sum_ts(np.array([output_fh_out]))
    transfer_values, transfer_dates = get_transfers(
        os.path.join(output_dir, metadata['name'], "sheet5", "sheet5_monthly"))

    if output_fh_in:
        inflow_values, inflow_dates = sum_ts(np.array([output_fh_in]))

    # Calculate the average longterm outflow.
    q_out_avg = np.nanmean(outflow_values)

    # Open a dictionary specyfing the landuseclasses.
    sheet1_lucs = gd.get_sheet1_classes()

    # Determine for what dates all the required data is available.
    if output_fh_in:
        common_dates = becgis.common_dates([
            complete_data['p'][1], complete_data['etb'][1],
            complete_data['etg'][1], outflow_dates, inflow_dates
        ])
    else:
        common_dates = becgis.common_dates([
            complete_data['tr'][1], complete_data['p'][1],
            complete_data['etb'][1], complete_data['etg'][1]
        ])  #, outflow_dates])

    # Create list to store results.
    all_results = list()

    for date in common_dates:
        # Summurize some data in a dictionary.
        entries = {
            'Fractions':
            complete_data['fractions'][0][complete_data['fractions'][1] ==
                                          date][0],
            'WPL':
            global_data["wpl_tif"],
            'EWR':
            global_data["environ_water_req"],
            'P':
            complete_data['p'][0][complete_data['p'][1] == date][0],
            'ETblue':
            complete_data['etb'][0][complete_data['etb'][1] == date][0],
            'ETgreen':
            complete_data['etg'][0][complete_data['etg'][1] == date][0]
        }

        # Select the required outflow value.
        q_outflow = outflow_values[outflow_dates == date][0]
        q_transfer = np.array(transfer_values)[np.array(transfer_dates) ==
                                               date][0]
        if output_fh_in:
            q_inflow = inflow_values[inflow_dates == date][0]
        else:
            q_inflow = 0.0

        # Calculate the sheet values.
        results = calc_sheet1(entries,
                              metadata['lu'],
                              sheet1_lucs,
                              metadata['recycling_ratio'],
                              q_outflow,
                              q_out_avg,
                              output_folder,
                              q_in_sw=q_inflow,
                              q_out_sw=q_transfer)

        # Save the results of the current month.
        all_results.append(results)

        # Create the csv-file.
        output_fh = os.path.join(
            output_folder, 'sheet1_monthly',
            'sheet1_{0}_{1}.csv'.format(date.year,
                                        str(date.month).zfill(2)))
        create_csv(results, output_fh)

        # Plot the actual sheet.
        create_sheet1_png(metadata['name'],
                          '{0}-{1}'.format(date.year,
                                           str(date.month).zfill(2)),
                          'km3/month',
                          output_fh,
                          output_fh.replace('.csv', '.pdf'),
                          template=get_path('sheet1_svg'),
                          smart_unit=True)

    # Create some graphs.
    plot_storages(all_results, common_dates, metadata['name'], output_folder)
    plot_parameter(all_results, common_dates, metadata['name'], output_folder,
                   'utilizable_outflow')

    # Create yearly csv-files.
    yearly_csv_fhs = hl.create_csv_yearly(os.path.split(output_fh)[0],
                                          os.path.join(output_folder,
                                                       "sheet1_yearly"),
                                          1,
                                          metadata['water_year_start_month'],
                                          year_position=[-11, -7],
                                          month_position=[-6, -4],
                                          header_rows=1,
                                          header_columns=3)

    # Plot yearly sheets.
    for csv_fh in yearly_csv_fhs:
        create_sheet1_png(metadata['name'],
                          csv_fh[-8:-4],
                          'km3/year',
                          csv_fh,
                          csv_fh.replace('.csv', '.pdf'),
                          template=get_path('sheet1_svg'),
                          smart_unit=True)

    return complete_data, all_results
def diagnosis_wp(metadata, complete_data, output_dir, waterpix):

    output_dir = os.path.join(output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    LU = becgis.open_as_array(metadata['lu'], nan_values=True)

    #    S = SortWaterPix(waterpix, 'Supply_M', output_dir)
    #    becgis.match_proj_res_ndv(metadata['lu'], becgis.list_files_in_folder(S), os.path.join(output_dir, "s_matched"))
    #    complete_data['supply'] = becgis.sort_files(os.path.join(output_dir, "s_matched"), [-10,-6], month_position = [-6,-4])[0:2]

    common_dates = becgis.common_dates([
        complete_data['p'][1], complete_data['et'][1], complete_data['tr'][1],
        complete_data['etb'][1]
    ])

    becgis.assert_proj_res_ndv([
        complete_data['p'][0], complete_data['et'][0], complete_data['tr'][0]
    ])

    balance_km3 = np.array([])

    p_km3 = np.array([])
    et_km3 = np.array([])
    ro_km3 = np.array([])

    balance_mm = np.array([])

    p_mm = np.array([])
    et_mm = np.array([])
    ro_mm = np.array([])

    area = becgis.map_pixel_area_km(metadata['lu'])

    for date in common_dates:

        print(date)

        P = complete_data['p'][0][complete_data['p'][1] == date][0]
        ET = complete_data['et'][0][complete_data['et'][1] == date][0]
        RO = complete_data['tr'][0][complete_data['tr'][1] == date][0]

        factor = 0.001 * 0.001 * area

        p = becgis.open_as_array(P, nan_values=True)
        et = becgis.open_as_array(ET, nan_values=True)
        ro = becgis.open_as_array(RO, nan_values=True)

        p[np.isnan(LU)] = et[np.isnan(LU)] = ro[np.isnan(LU)] = np.nan

        balance_km3 = np.append(
            balance_km3,
            np.nansum(p * factor) - np.nansum(et * factor) -
            np.nansum(ro * factor))
        p_km3 = np.append(p_km3, np.nansum(p * factor))
        et_km3 = np.append(et_km3, np.nansum(et * factor))
        ro_km3 = np.append(ro_km3, np.nansum(ro * factor))

        balance_mm = np.append(balance_mm,
                               np.nanmean(p) - np.nanmean(et) - np.nanmean(ro))
        p_mm = np.append(p_mm, np.nanmean(p))
        et_mm = np.append(et_mm, np.nanmean(et))
        ro_mm = np.append(ro_mm, np.nanmean(ro))

    relative_storage = np.cumsum(balance_km3) / np.mean(p_km3)

    ##
    # BASIC BASINSCALE WATERBALANCE (PRE-SHEETS)
    ##
    fig = plt.figure(1, figsize=(9, 6))
    plt.clf()
    fig.patch.set_alpha(0.7)

    ax2 = plt.gca()
    ax = ax2.twinx()

    ax2.bar(common_dates, relative_storage, width=25, color='#3ee871')

    ax2.grid(b=True, which='Major', color='0.65', linestyle='--', zorder=0)
    ax.bar([common_dates[0]], [0],
           label='$\sum dS / \overline{P}$',
           color='#3ee871')
    ax.plot(common_dates, np.cumsum(balance_km3), label='$\sum dS$')
    ax.plot(common_dates, np.cumsum(p_km3), label='$\sum (P)$')
    ax.plot(common_dates,
            np.cumsum(et_km3) + np.cumsum(ro_km3),
            label='$\sum (ET + RO)$')

    box = ax.get_position()
    ax.set_position(
        [box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9])
    ax2.set_position(
        [box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9])

    ax.legend(loc='upper center',
              bbox_to_anchor=(0.5, -0.1),
              fancybox=True,
              shadow=True,
              ncol=5)

    plt.suptitle(
        '$\sum P = {0:.1f}\;{4}, \\ \sum ET = {1:.1f}\;{4}, \sum RO = {2:.1f}\;{4}, \sum dS = {3:.1f}\;{4}$'
        .format(np.sum(p_km3), np.sum(et_km3), np.sum(ro_km3),
                np.sum(balance_km3), r"km^{3}"))
    plt.title(
        '{0}, ${5} = {2:.3f}\;{6}, {7} = {3:.3f}, dt = {4}\;months$'.format(
            metadata['name'], np.sum(balance_km3), np.mean(balance_km3),
            np.mean(relative_storage), len(p_km3), r"\overline{dS}", r"km^{3}",
            r"\overline{\sum dS / \overline{P}}"))
    plt.xlabel('Time')

    ax2.set_ylabel('Relative Storage [months of $\overline{P}$]')
    ax.set_ylabel('Stock [$km^{3}$]')
    #plt.savefig(os.path.join(output_dir, 'balance_{0}'.format(metadata['name'])))

    fig = plt.figure(2)
    plt.clf()

    ax2 = plt.gca()
    ax = ax2.twinx()

    ax2.plot(common_dates, p_mm, common_dates, et_mm, common_dates, ro_mm)
    ax.plot(common_dates, np.cumsum(balance_mm), 'k')