Ejemplo n.º 1
0
def loadData(design):
    param_bounds, param_names, params_no, problem = setupProblem(design)
    samples, rows_to_keep = getSamples(design, params_no, param_bounds)
    df = pd.DataFrame({'mu0':samples[:,0],'sigma0':samples[:,1],\
                       'mu1':samples[:,2],'sigma1':samples[:,3],\
                       'p00':samples[:,4],'p11':samples[:,5]})

    return df
Ejemplo n.º 2
0
def main():
    if len(argv) != 8:
        print(
            'Usage: FsPlusRpPlusGfs NormalFilePath TumorFilePath TargetDimensionalityFS TargetDimensionalityRP TargetDimensionalityGFS NumberOfNormalSamples NumberOfTumorSamples'
        )
        exit()
    """Parameters for executing the script"""
    # File Path of Normal People's Data
    normalFilePath = argv[1]
    # File Path of Tumor People's Data
    tumorFilePath = argv[2]
    # The Dimensionality of Subspace after Feature Selection Stage
    targetDimensionalityFS = int(argv[3])
    # The Dimensionality of Subspace after Random Projection Stage
    targetDimensionalityRP = int(argv[4])
    # The Dimensionality of Subspace after Greedy Feature Selection Stage
    targetDimensionalityGFS = int(argv[5])
    # Number of Samples for Normal People
    numberOfNormalTestingSamples = int(argv[6])
    # Number of Samples for Tumor People
    numberOfTumorTestingSamples = int(argv[7])
    """Read data from files"""
    # Sample of Normal People
    normalSamples = getSamples(normalFilePath)
    numberOfNormalSamples = len(normalSamples)
    # Samples of Tumor People
    tumorSamples = getSamples(tumorFilePath)
    numberOfTumorSamples = len(tumorSamples)
    # All Samples
    samples = normalSamples + tumorSamples
    numberOfSamples = len(samples)
    # Gene Indexes in List: samples
    normalSampleIndexes = range(0, numberOfNormalSamples)
    tumorSampleIndexes = range(numberOfNormalSamples, numberOfSamples)

    numberOfGenes = len(samples[0]) if len(samples) != 0 else 0
    print('Original Data Matrix: {} Samples with {} Genes'.format(
        numberOfSamples, numberOfGenes))
    """Data preprocessing"""
    zScoreNormalization(samples)
    """Runtime Result"""
    trainingErrorSamples, tp, fp, fn, tn = \
        featureSelectionPlusRandomProjectionPlusGreedyFeatureSelectionProceduce(
            samples, normalSampleIndexes, tumorSampleIndexes, numberOfGenes, targetDimensionalityFS,
            targetDimensionalityRP, targetDimensionalityGFS, numberOfNormalTestingSamples, numberOfTumorTestingSamples)
def makeFigure8_ResponseSurfaces():

    sns.set_style("white")

    # constants, vectors
    design = 'LHsamples_wider_1000_AnnQonly'
    structure = '53_ADC022'
    short_idx = np.arange(2, 22, 2)
    demand_idx = np.arange(1, 21, 2)
    percentiles = [50, 90]
    nrealizations = 10
    nyears = 105
    nmonths = 12

    # plotting characteristics
    shortage_cmap = mpl.cm.get_cmap('RdBu_r')
    colors = [
        '#de2d26', '#fb6a4a', '#3182bd', '#6baed6', '#a50f15', '#08519c',
        '#9e9ac8'
    ]

    # find which samples are still in param_bounds after flipping misidentified wet and dry states
    param_bounds, param_names, params_no, problem = setupProblem(design)
    samples, rows_to_keep = getSamples(design, params_no, param_bounds)
    nsamples = len(rows_to_keep)

    # load historical shortage data and convert acre-ft to m^3
    hist_short = np.loadtxt('../Simulation_outputs/' + structure +
                            '_info_hist.txt')[:, 2] * 1233.48
    hist_demand = np.loadtxt('../Simulation_outputs/' + structure +
                             '_info_hist.txt')[:, 1] * 1233.48
    # replace failed runs with np.nan (currently -999.9)
    hist_short[hist_short < 0] = np.nan

    # load shortage data for this experimental design
    SYN = np.load('../Simulation_outputs/' + design + '/' + structure +
                  '_info.npy')
    # extract columns for year shortage and demand and convert acre-ft to ^3
    SYN_short = SYN[:, short_idx, :] * 1233.48
    SYN_demand = SYN[:, demand_idx, :] * 1233.48
    # use just the samples within the experimental design
    SYN_short = SYN_short[:, :, rows_to_keep]
    SYN_demand = SYN_demand[:, :, rows_to_keep]
    # replace failed runs with np.nan (currently -999.9)
    SYN_short[SYN_short < 0] = np.nan
    # Identify droughts at percentiles
    syn_magnitude = calc_syn_magnitude(nyears, nmonths, nrealizations,
                                       nsamples, percentiles, SYN_short)
    # reshape synthetic shortage data into 12*nyears x nsamples*nrealizations
    SYN_short = SYN_short.reshape([
        np.shape(SYN_short)[0],
        np.shape(SYN_short)[1] * np.shape(SYN_short)[2]
    ])
    SYN_demand = SYN_demand.reshape([
        np.shape(SYN_demand)[0],
        np.shape(SYN_demand)[1] * np.shape(SYN_demand)[2]
    ])

    # create data frames of shortage and SOWs
    CMIPsamples = np.loadtxt('../Qgen/CMIPunscaled_SOWs.txt')[:, 7:13]
    PaleoSamples = np.loadtxt('../Qgen/Paleo_SOWs.txt')[:, 7:13]
    CMIP = pd.DataFrame(data=np.repeat(CMIPsamples, nrealizations, axis=0),
                        columns=param_names)
    Paleo = pd.DataFrame(data=np.repeat(PaleoSamples, nrealizations, axis=0),
                         columns=param_names)
    dta = pd.DataFrame(data=np.repeat(samples, nrealizations, axis=0),
                       columns=param_names)
    R2_scores = pd.read_csv('../Simulation_outputs/' + design + '/' +
                            structure + '_R2.csv')

    fig, axes = plt.subplots(2, 3, figsize=(19.2, 9.5))
    fig.subplots_adjust(hspace=0.3, wspace=0.3)
    # plot shortage distribution for this structure under all-encompassing experiment
    ax1 = axes[0, 0]
    handles, labels = plotSDC(ax1, SYN_short, SYN_demand, hist_short,
                              hist_demand, nsamples, nrealizations)
    ax1.set_ylim([0, 6200000])
    ax1.ticklabel_format(style='sci', axis='y', scilimits=(6, 6))
    ax1.tick_params(axis='both', labelsize=14)
    ax1.set_ylabel('Shortage (m' + r'$^3$' + ')', fontsize=14)
    # add lines at percentiles
    for percentile in percentiles:
        ax1.plot([percentile, percentile], [0, 6200000], c='k')

    # plot variance decomposition for this structure under all-encompassing experiment
    ax2 = axes[1, 0]
    S1_values = pd.read_csv('../Simulation_outputs/' + design + '/' +
                            structure + '_S1.csv')
    plotSums(S1_values, ax2, colors)
    ax2.set_ylim([0, 1])
    ax2.tick_params(axis='both', labelsize=14)
    ax2.set_ylabel('Portion of Variance', fontsize=14)
    ax2.set_xlabel('Shortage Percentile', fontsize=14)
    # add lines at percentiles
    for percentile in percentiles:
        ax2.plot([percentile, percentile], [0, 1], c='k')

    for i in range(len(percentiles)):
        # get shortage magnitudes at this percentile
        dta['Shortage'] = syn_magnitude[i, :]
        # find average shortage across realizations in each SOW
        avg_dta = dta.groupby(['mu0', 'mu1', 'sigma0', 'sigma1', 'p00', 'p11'],
                              as_index=False)[['Shortage']].mean()
        percentile_scores = R2_scores[str(int(percentiles[i] - 1))]
        if percentile_scores[0] > 0:
            # get top two predictors of shortage
            top_two = list(np.argsort(percentile_scores)[::-1][:2])
            predictors = list(
                [param_names[top_two[0]], param_names[top_two[1]]])
            avg_dta['Interaction'] = avg_dta[predictors[0]] * avg_dta[
                predictors[1]]
            # fit OLS model with top two predictors and their interaction
            result = fitOLS_interact(avg_dta, predictors)
            xgrid = np.arange(param_bounds[top_two[0]][0], param_bounds[top_two[0]][1], \
                 np.around((param_bounds[top_two[0]][1]-param_bounds[top_two[0]][0])/100,decimals=4))
            ygrid = np.arange(param_bounds[top_two[1]][0], param_bounds[top_two[1]][1], \
                 np.around((param_bounds[top_two[1]][1]-param_bounds[top_two[1]][0])/100,decimals=4))

            # plot average shortage in each SOW and prediction from regression
            plotResponseSurface(axes[0,i+1], result, avg_dta, CMIP, Paleo, shortage_cmap, shortage_cmap, \
             xgrid, ygrid, predictors[0], predictors[1], otherSOWs = False)
            axes[0, i + 1].set_title(str(percentiles[i]) + 'th Percentile',
                                     fontsize=16)
            fig.savefig('Figure8_ResponseSurfaces.pdf')

            # plot prediction from regression with CMIP and Paleo samples on top
            plotResponseSurface(axes[1,i+1], result, avg_dta, CMIP, Paleo, shortage_cmap, shortage_cmap, \
             xgrid, ygrid, predictors[0], predictors[1], otherSOWs = True)
            fig.savefig('Figure8_ResponseSurfaces.pdf')

    fig.savefig('Figure8_ResponseSurfaces.pdf')
    fig.clf()

    return None
def makeFigureS18_FactorMaps_User3():

    sns.set_style("white")
    
    # constants, vectors
    design = 'LHsamples_wider_1000_AnnQonly'
    structure = '3704614'
    short_idx = np.arange(2,22,2)
    demand_idx = np.arange(1,21,2)
    percentiles = [40, 90]
    nrealizations = 10
    
    # plotting characteristics
    probability_cmap = mpl.cm.get_cmap('RdBu')
    success_cmap = mpl.colors.ListedColormap(np.array([[227,26,28],[166,206,227]])/255.0)
    contour_levels = np.arange(0.0, 1.05,0.1)
              
    # find which samples are still in param_bounds after flipping misidentified wet and dry states
    param_bounds, param_names, params_no, problem = setupProblem(design)
    samples, rows_to_keep = getSamples(design, params_no, param_bounds)
    nsamples = len(rows_to_keep)
    
    # load historical shortage data and convert acre-ft to m^3
    hist_short = np.loadtxt('../Simulation_outputs/' + structure + '_info_hist.txt')[:,2]*1233.48
    hist_demand = np.loadtxt('../Simulation_outputs/' + structure + '_info_hist.txt')[:,1]*1233.48
    # replace failed runs with np.nan (currently -999.9)
    hist_short[hist_short < 0] = np.nan
    
    # load shortage data for this experimental design
    SYN = np.load('../Simulation_outputs/' + design + '/' + structure + '_info.npy')
    # extract columns for year shortage and demand and convert acre-ft to ^3
    SYN_short = SYN[:,short_idx,:]*1233.48
    SYN_demand = SYN[:,demand_idx,:]*1233.48
    # use just the samples within the experimental design
    SYN_short = SYN_short[:,:,rows_to_keep]
    SYN_demand = SYN_demand[:,:,rows_to_keep]
    # replace failed runs with np.nan (currently -999.9)
    SYN_short[SYN_short < 0] = np.nan
    # reshape synthetic shortage data into 12*nyears x nsamples*nrealizations
    SYN_short = SYN_short.reshape([np.shape(SYN_short)[0],np.shape(SYN_short)[1]*np.shape(SYN_short)[2]])
    SYN_demand = SYN_demand.reshape([np.shape(SYN_demand)[0],np.shape(SYN_demand)[1]*np.shape(SYN_demand)[2]])
    
    # create data frames of shortage and SOWs
    dta = pd.DataFrame(data = np.repeat(samples, nrealizations, axis = 0), columns=param_names)
    
    
    fig, axes = plt.subplots(2,4,figsize=(24.3,9.1))
    fig.subplots_adjust(hspace=0.5,right=0.8,wspace=0.5)        
    # plot shortage distribution for this structure under all-encompassing experiment
    ax1 = axes[0,0]
    handles, labels = plotSDC(ax1, SYN_short, SYN_demand, hist_short, hist_demand, nsamples, nrealizations, True)
    ax1.set_ylim([0,1])
    ax1.tick_params(axis='both',labelsize=14)
    ax1.set_ylabel('Shortage/Demand',fontsize=14)
    ax1.set_xlabel('Shortage Percentile',fontsize=14)
    # add lines at percentiles
    for percentile in percentiles:
        ax1.plot([percentile, percentile],[0,1],c='k')
    
    # plotfailure heatmap for this structure under all-encompassing experiment
    ax2 = axes[1,0]
    allSOWs, historic_percents, frequencies, magnitudes, gridcells, im = plotFailureHeatmap(ax2, design, structure)
    addPercentileBlocks(historic_percents, gridcells, percentiles, ax2)
    allSOWsperformance = allSOWs/100
    historic_percents = [roundup(x) for x in historic_percents]
    #all_pseudo_r_scores = calcPseudoR2(frequencies, magnitudes, params_no, allSOWsperformance, dta, structure, design)
    all_pseudo_r_scores = pd.read_csv("../Simulation_outputs/" + design + "/" + structure + "_pseudo_r_scores.csv")
    
    for i in range(len(percentiles)):
        for j in range(3):
            # magnitude of shortage at this percentile to plot
            h = np.where(np.array(historic_percents) == 100 - percentiles[i])[0][0]
            if j == 0:
                h -= 2
            elif j == 2:
                h += 2
            # find out if each realization was a success or failure at this magnitude/frequency combination
            dta['Success'] = allSOWsperformance[list(frequencies).index(100-percentiles[i]),h,:]
            # consider each SOW a success if 50% or more realizations were a success
            avg_dta = dta.groupby(['mu0','mu1','sigma0','sigma1','p00','p11'],as_index=False)[['Success']].mean()
            avg_dta.loc[np.where(avg_dta['Success']>=0.5)[0],'Success'] = 1
            avg_dta.loc[np.where(avg_dta['Success']<0.5)[0],'Success'] = 0
            # load pseudo R2 of predictors for this magnitude/frequency combination
            pseudo_r_scores = all_pseudo_r_scores[str((100-percentiles[i]))+'yrs_'+str(magnitudes[h])+'prc'].values
            if pseudo_r_scores.any():
                top_predictors = np.argsort(pseudo_r_scores)[::-1][:2]
                ranges = param_bounds[top_predictors]
                # define grid of x (1st predictor), and y (2nd predictor) dimensions
                # to plot contour map over
                xgrid = np.arange(param_bounds[top_predictors[0]][0], 
                                  param_bounds[top_predictors[0]][1], np.around((ranges[0][1]-ranges[0][0])/100,decimals=4))
                ygrid = np.arange(param_bounds[top_predictors[1]][0], 
                                  param_bounds[top_predictors[1]][1], np.around((ranges[1][1]-ranges[1][0])/100,decimals=4))
                all_predictors = [ dta.columns.tolist()[k] for k in top_predictors]
                # fit logistic regression model with top two predictors of success and their interaction
                avg_dta['Interaction'] = avg_dta[all_predictors[0]]*dta[all_predictors[1]]
                result = fitLogit_interact(avg_dta, [all_predictors[k] for k in [0,1]])
                
                # plot success/failure for each SOW on top of logistic regression estimate of probability of success
                contourset = plotFactorMap(axes[i,j+1], result, avg_dta, probability_cmap, success_cmap, contour_levels, xgrid, ygrid, \
                              all_predictors[0], all_predictors[1])
                axes[i,j+1].set_title("Success if " + str(magnitudes[h]) + "% shortage\n<" + str((100-percentiles[i])) + "% of the time", fontsize=16)
                fig.savefig('FigureS18_FactorMaps_User3.pdf')
                
    cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
    cbar = fig.colorbar(contourset, cax=cbar_ax)
    cbar.ax.set_ylabel("Predicted Probability of Success", rotation=-90, va="bottom",fontsize=16)
    cbar.ax.tick_params(labelsize=16)
    fig.savefig("FigureS18_FactorMaps_User3.pdf")
    fig.clf()
    
    return None
Ejemplo n.º 5
0
def makeFigureS11_VarianceDecomposition_User3():

    sns.set_style("white")

    designs = [
        'LHsamples_original_1000_AnnQonly', 'CMIPunscaled_SOWs', 'Paleo_SOWs',
        'LHsamples_wider_1000_AnnQonly'
    ]
    nsamples = [1000, 97, 366, 1000]  # before removing those out of bounds
    titles = ['Box Around Historical', 'CMIP', 'Paleo', 'All-Encompassing']
    structure = '3704614'
    nrealizations = 10
    short_idx = np.arange(2, 22, 2)
    demand_idx = np.arange(1, 21, 2)

    colors = [
        "#de2d26", "#fb6a4a", "#3182bd", "#6baed6", "#a50f15", "#08519c",
        "#9e9ac8"
    ]
    mu0 = plt.Rectangle((0, 0), 1, 1, fc=colors[0], edgecolor='none')
    sigma0 = plt.Rectangle((0, 0), 1, 1, fc=colors[1], edgecolor='none')
    mu1 = plt.Rectangle((0, 0), 1, 1, fc=colors[2], edgecolor='none')
    sigma1 = plt.Rectangle((0, 0), 1, 1, fc=colors[3], edgecolor='none')
    p00 = plt.Rectangle((0, 0), 1, 1, fc=colors[4], edgecolor='none')
    p11 = plt.Rectangle((0, 0), 1, 1, fc=colors[5], edgecolor='none')
    Interact = plt.Rectangle((0, 0), 1, 1, fc=colors[6], edgecolor='none')

    # perform variance decomposition
    #for i, design in enumerate(designs):
    #    Sobol_per_structure(design, structure)

    # plot shotrage distributions
    fig = plt.figure()
    count = 1  # subplot counter

    # load historical shortage and demand data and convert acre-ft to m^3
    hist_short = np.loadtxt('../Simulation_outputs/' + structure +
                            '_info_hist.txt')[:, 2] * 1233.48 / 1E6
    hist_demand = np.loadtxt('../Simulation_outputs/' + structure +
                             '_info_hist.txt')[:, 1] * 1233.48 / 1E6
    # replace failed runs with np.nan (currently -999.9)
    hist_short[hist_short < 0] = np.nan
    for i, design in enumerate(designs):
        # find which samples are still in param_bounds after flipping misidentified wet and dry states
        param_bounds, param_names, params_no, problem = setupProblem(design)
        _, rows_to_keep = getSamples(design, params_no, param_bounds)
        nsamples[i] = len(
            rows_to_keep
        )  # after removing those out of bounds after reclassification

        # load shortage data for this experimental design
        SYN = np.load('../Simulation_outputs/' + design + '/' + structure +
                      '_info.npy')
        # extract columns for year shortage and demand and convert acre-ft to m^3
        SYN_short = SYN[:, short_idx, :] * 1233.48 / 1E6
        SYN_demand = SYN[:, demand_idx, :] * 1233.48 / 1E6
        # use just the samples within the experimental design
        SYN_short = SYN_short[:, :, rows_to_keep]
        SYN_demand = SYN_demand[:, :, rows_to_keep]
        # reshape into 12*nyears x nsamples*nrealizations
        SYN_short = SYN_short.reshape([
            np.shape(SYN_short)[0],
            np.shape(SYN_short)[1] * np.shape(SYN_short)[2]
        ])
        SYN_demand = SYN_demand.reshape([
            np.shape(SYN_demand)[0],
            np.shape(SYN_demand)[1] * np.shape(SYN_demand)[2]
        ])
        # replace failed runs with np.nan (currently -999.9)
        SYN_short[SYN_short < 0] = np.nan

        # plot shortage distribution
        ax = fig.add_subplot(2, 4, count)
        handles, labels = plotSDC(ax, SYN_short, SYN_demand, hist_short,
                                  hist_demand, nsamples[i], nrealizations)

        # only put labels left column, make y ranges consistent, title experiment
        if count == 1:
            ax.tick_params(axis='y', labelsize=14)
            ax.set_ylabel('Annual Shortage\n(millions of m' + r'$^3$' + ')',
                          fontsize=16)
        else:
            ax.tick_params(axis='y', labelleft='off')

        ax.set_title(titles[count - 1], fontsize=16)
        ax.tick_params(axis='x', labelbottom='off')

        # iterature subplot counter
        count += 1

    # plot variance decomposition
    for design in designs:
        # load sensitivity indices
        S1_values = pd.read_csv('../Simulation_outputs/' + design + '/' +
                                structure + '_S1.csv')

        # plot shortage distribution
        ax = fig.add_subplot(2, 4, count)
        plotSums(S1_values, ax, colors)

        ax.tick_params(axis='x', labelsize=14)

        if count == 5:
            ax.tick_params(axis='y', labelsize=14)
            ax.set_ylabel('Portion of\nVariance Explained', fontsize=16)
        else:
            ax.tick_params(axis='y', labelleft='off')

        # iterate subplot counter
        count += 1

    fig.set_size_inches([16, 8])
    fig.subplots_adjust(bottom=0.22)
    fig.text(0.5, 0.15, 'Percentile of Shortage', ha='center', fontsize=16)
    fig.savefig('FigureS11_VarianceDecomposition_User3.pdf')
    fig.clf()

    return None
def makeFigure6_ShortageDistns():

    sns.set_style("white")

    designs = [
        'LHsamples_original_1000_AnnQonly', 'CMIPunscaled_SOWs', 'Paleo_SOWs',
        'LHsamples_wider_1000_AnnQonly'
    ]
    nsamples = [1000, 97, 366, 1000]  # before removing those out of bounds
    titles = ['Box Around Historical', 'CMIP', 'Paleo', 'All-Encompassing']
    structures = ['53_ADC022', '7200645']
    nrealizations = 10
    short_idx = np.arange(2, 22, 2)
    demand_idx = np.arange(1, 21, 2)

    fig = plt.figure()
    count = 1  # subplot counter
    for structure in structures:
        # load historical shortage and demand data and convert acre-ft to m^3
        hist_short = np.loadtxt('../Simulation_outputs/' + structure +
                                '_info_hist.txt')[:, 2] * 1233.48 / 1E6
        hist_demand = np.loadtxt('../Simulation_outputs/' + structure +
                                 '_info_hist.txt')[:, 1] * 1233.48 / 1E6
        # replace failed runs with np.nan (currently -999.9)
        hist_short[hist_short < 0] = np.nan
        for i, design in enumerate(designs):
            # find which samples are still in param_bounds after flipping misidentified wet and dry states
            param_bounds, param_names, params_no, problem = setupProblem(
                design)
            _, rows_to_keep = getSamples(design, params_no, param_bounds)
            nsamples[i] = len(
                rows_to_keep
            )  # after removing those out of bounds after reclassification

            # load shortage data for this experimental design
            SYN = np.load('../Simulation_outputs/' + design + '/' + structure +
                          '_info.npy')
            # extract columns for year shortage and demand and convert acre-ft to m^3
            SYN_short = SYN[:, short_idx, :] * 1233.48 / 1E6
            SYN_demand = SYN[:, demand_idx, :] * 1233.48 / 1E6
            # use just the samples within the experimental design
            SYN_short = SYN_short[:, :, rows_to_keep]
            SYN_demand = SYN_demand[:, :, rows_to_keep]
            # reshape into 12*nyears x nsamples*nrealizations
            SYN_short = SYN_short.reshape([
                np.shape(SYN_short)[0],
                np.shape(SYN_short)[1] * np.shape(SYN_short)[2]
            ])
            SYN_demand = SYN_demand.reshape([
                np.shape(SYN_demand)[0],
                np.shape(SYN_demand)[1] * np.shape(SYN_demand)[2]
            ])
            # replace failed runs with np.nan (currently -999.9)
            SYN_short[SYN_short < 0] = np.nan

            # plot shortage distribution
            ax = fig.add_subplot(2, 4, count)
            handles, labels = plotSDC(ax, SYN_short, SYN_demand, hist_short,
                                      hist_demand, nsamples[i], nrealizations)

            # only put labels on bottom row/left column, make y ranges consistent, title experiment
            if count == 1 or count == 5:
                ax.tick_params(axis='y', labelsize=14)
            else:
                ax.tick_params(axis='y', labelleft='off')

            if count <= 4:
                ax.tick_params(axis='x', labelbottom='off')
                ax.set_title(titles[count - 1], fontsize=16)
                ax.set_ylim(0, 6.2)
                #ax.ticklabel_format(style='sci', axis='y', scilimits=(6,6))
            else:
                ax.tick_params(axis='x', labelsize=14)
                ax.set_ylim(0, 370)
                #ax.ticklabel_format(style='sci', axis='y', scilimits=(8,8))

            # iterature subplot counter
            count += 1

    fig.set_size_inches([16, 8])
    fig.text(0.5, 0.15, 'Percentile', ha='center', fontsize=16)
    fig.text(0.05,
             0.5,
             'Annual Shortage (millions of m' + r'$^3$' + ')',
             va='center',
             rotation=90,
             fontsize=16)
    fig.subplots_adjust(bottom=0.22)
    labels_transposed = [
        labels[9], labels[4], labels[8], labels[3], labels[7], labels[2],
        labels[6], labels[1], labels[5], labels[0]
    ]
    handles_transposed = [
        handles[9], handles[4], handles[8], handles[3], handles[7], handles[2],
        handles[6], handles[1], handles[5], handles[0]
    ]
    legend = fig.legend(handles=handles_transposed,
                        labels=labels_transposed,
                        fontsize=16,
                        loc='lower center',
                        title='Cumulative frequency in experiment',
                        ncol=5)
    plt.setp(legend.get_title(), fontsize=16)
    fig.savefig('Figure6_ShortageDistns.pdf')
    fig.clf()

    return None