def AEP(self, stats, windspeeds): ''' Get AEPs for simulation cases TODO: Print/Save this someplace besides the console Parameters: ---------- stats: dict, list, pd.DataFrame Dict (single case), list(multiple cases), df(single or multiple cases) containing summary statistics. windspeeds: list-like List of wind speed values corresponding to each power output in the stats input for a single dataset Returns: -------- AEP: List Annual energy production corresponding to ''' # Make sure stats is in pandas df if isinstance(stats, dict): stats_df = pdTools.dict2df(stats) elif isinstance(stats, list): stats_df = pdTools.dict2df(stats) elif isinstance(stats, pd.DataFrame): stats_df = stats else: raise TypeError('Input stats is must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.') # Check windspeed length if len(windspeeds) == len(stats_df): ws = windspeeds elif int(len(windspeeds)/len(stats_df.columns.levels[0])) == len(stats_df): ws = windspeeds[0:len(stats_df)] print('WARNING: Assuming the input windspeed array is duplicated for each dataset.') else: raise ValueError( 'Length of windspeeds is not the correct length for the input statistics.') # load power array if 'GenPwr' in stats_df.columns.levels[0]: pwr_array = np.array(stats_df.loc[:, ('GenPwr', 'mean')]) elif 'GenPwr' in stats_df.columns.levels[1]: pwr_array = stats_df.loc[:, (slice(None), 'GenPwr', 'mean')] else: raise ValueError("('GenPwr','Mean') does not exist in the input statistics.") # group and average powers by wind speeds pwr_array['windspeeds'] = ws pwr_array = pwr_array.groupby('windspeeds').mean() # find set of wind speeds ws_set = list(set(ws)) # wind probability wind_prob = self.prob_WindDist(ws_set, disttype='pdf') # Calculate AEP AEP = np.trapz(pwr_array.T * wind_prob, ws_set) * 8760 return AEP
def plot_load_ranking(self, load_rankings, case_matrix, classifier_type, classifier_names=[], n_rankings=10, caseidx_labels=False): ''' case_matrix has to have wind speeds in it if you want to plot wrt wind speeds TODO: Save figs ''' # flag_DLC_name = False # n_rankings = 10 # fig_ext = '.pdf' # font_size = 10 # classifier_type = ('ServoDyn', 'DLL_FileName') # classifiers = list(set(cmw[classifier_type])) # classifier_names = ['ROSCO', 'legacy'] # Check for valid inputs if isinstance(load_rankings, dict): load_ranking_df = pdTools.dict2df(load_rankings) elif isinstance(load_rankings, list): load_ranking_df = pdTools.dict2df(load_rankings) elif isinstance(load_rankings, pd.DataFrame): load_ranking_df = load_rankings else: raise TypeError( 'Input stats must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.') # Check multiindex size if len(load_ranking_df) == 2: load_ranking_df = pd.concat([load_ranking_df], keys=[dataset_0]) # Check for classifier_names classifiers = list(set(case_matrix[classifier_type])) if not classifier_names: classifier_names = ['datatset_{}'.format(idx) for idx in range(len(classifiers))] # Check for wind speeds in case_matrix if not caseidx_labels: try: windspeeds = case_matrix[('InflowWind','WindSpeed')] except: print('Unable to find wind speeds in case_matrix, plotting w.r.t case index') caseidx_labels=True # Define a color map clrs = np.array([[127, 60, 141], [17, 165, 121], [57, 105, 172], [242, 183, 1], [231, 63, 116], [128, 186, 90], [230, 131, 16], [256, 256, 256]]) / 256. # Get channel names channels = load_ranking_df.columns.levels[1] # initialize some variables colors = np.zeros((n_rankings, 3)) labels = [''] * n_rankings labels_index = [''] * n_rankings fig_list = [] ax_list = [] # --- Generate plots --- for cidx, channel in enumerate(channels): # Pull out specific channel cdf = load_ranking_df.loc[:, (slice(None), channel, slice(None))].droplevel(1, axis=1) # put the load ranking from each dataset in a list so we can combine them cdf_list = [cdf[dataset] for dataset in cdf.columns.levels[0]] chan_df = pd.concat(cdf_list) # combine all load rankings chan_stats = chan_df.columns.values # pull out the names of the columns chan_df.sort_values(by=chan_stats[0], ascending=False, inplace=True) # sort chan_df.reset_index(inplace=True, drop=True) # re-index # find colors and labels for plots for i in range(n_rankings): classifier = case_matrix[classifier_type][chan_df[chan_stats[1]][i]] colors[i, :] = clrs[min(len(clrs), classifiers.index(classifier))] if not caseidx_labels: ws = windspeeds[chan_df[chan_stats[1]][i]] labels[i] = classifier_names[classifiers.index(classifier)] + ' - ' + str(ws) + ' m/s' else: labels[i] = classifier_names[classifiers.index(classifier)] + ' - Case ' + str(chan_df[chan_stats[1]][i]) # labels_index = ['case {}'.format(case) for case in chan_df[chan_stats[1]][0:n_rankings]] # make plot fig, ax = plt.subplots() chan_df[chan_stats[0]][0:n_rankings].plot.bar(color=colors) ax.set_ylabel(channel) ax.set_xticklabels(labels, rotation=45, ha='right') plt.draw() fig_list.append(fig) ax_list.append(ax) # if case_idx_labels: # ax.set_xlabel('DLC [-]', fontsize=font_size+2, fontweight='bold') # # ax.set_xticklabels(np.arange(n_rankings), labels=labels) # ax.set_xticklabels(labels) # else: # # ax.set_xticklabels(np.arange(n_rankings), labels=labels) return fig_list, ax_list
def stat_curve(self, windspeeds, stats, plotvar, plottype, stat_idx=0, names=[]): ''' Plot the turbulent power curve for a set of data. Can be plotted as bar (good for comparing multiple cases) or line Parameters: ------- windspeeds: list-like List of wind speeds to plot stats: list, dict, or pd.DataFrame Dict (single case), list(multiple cases), df(single or multiple cases) containing summary statistics. plotvar: str Type of variable to plot plottype: str bar or line stat_idx: int, optional Index of datasets in stats to plot from Returns: -------- fig: figure handle ax: axes handle ''' # Check for valid inputs if isinstance(stats, dict): stats_df = pdTools.dict2df(stats) if any((stat_inds > 0) or (isinstance(stat_inds, list))): print('WARNING: stat_ind = {} is invalid for a single stats dictionary. Defaulting to stat_inds=0.') stat_inds = 0 elif isinstance(stats, list): stats_df = pdTools.dict2df(stats) elif isinstance(stats, pd.DataFrame): stats_df = stats else: raise TypeError( 'Input stats must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.') # Check windspeed length if len(windspeeds) == len(stats_df): ws = windspeeds elif int(len(windspeeds)/len(stats_df.columns.levels[0])) == len(stats_df): ws = windspeeds[0:len(stats_df)] else: raise ValueError('Length of windspeeds is not the correct length for the input statistics') # Get statistical data for desired plot variable if plotvar in stats_df.columns.levels[0]: sdf = stats_df.loc[:, (plotvar, slice(None))].droplevel([0], axis=1) elif plotvar in stats_df.columns.levels[1]: sdf = stats_df.loc[:, (slice(None), plotvar, slice(None))].droplevel([1], axis=1) else: raise ValueError("{} does not exist in the input statistics.".format(plotvar)) # Add windspeeds to data sdf['WindSpeeds']= ws # Group by windspeed and average each statistic (for multiple seeds) sdf = sdf.groupby('WindSpeeds').mean() # Final wind speed values pl_windspeeds=sdf.index.values if plottype == 'bar': # Define mean and std dataframes means = sdf.loc[:, (slice(None), 'mean')].droplevel(1, axis=1) std = sdf.loc[:, (slice(None), 'std')].droplevel(1, axis=1) # Plot bar charts fig, ax = plt.subplots() means.plot.bar(yerr=std, ax=ax, title=plotvar, capsize=2) ax.legend(names,loc='upper left') if plottype == 'line': # Define mean, min, max, and std dataframes means = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'mean')] smax = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'max')] smin = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'min')] std = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'std')] fig, ax = plt.subplots() ax.errorbar(pl_windspeeds, means, [means - smin, smax - means], fmt='k', ecolor='gray', lw=1, capsize=2) means.plot(yerr=std, ax=ax, capsize=2, lw=3, elinewidth=2, title=names[0] + ' - ' + plotvar) plt.grid(lw=0.5, linestyle='--') return fig, ax
def load_ranking(self, stats, names=[], get_df=False): ''' Find load rankings for desired signals Parameters: ------- stats: dict, list, pd.DataFrame summary statistic information ranking_stats: list desired statistics to rank for load ranking (e.g. ['max', 'std']) ranking_vars: list desired variables to for load ranking (e.g. ['GenTq', ['RootMyb1', 'RootMyb2', 'RootMyb3']]) names: list of strings, optional names corresponding to each dataset get_df: bool, optional Return pd.DataFrame of data? Returns: ------- load_ranking: dict dictionary containing load rankings load_ranking_df: pd.DataFrame pandas DataFrame containing load rankings ''' # Make sure stats is in pandas df if isinstance(stats, dict): stats_df = pdTools.dict2df([stats], names=names) elif isinstance(stats, list): stats_df = pdTools.dict2df(stats, names=names) elif isinstance(stats, pd.DataFrame): stats_df = stats else: raise TypeError('Input stats is must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.') # Ensure naming consitency if not names: names = list(stats_df.columns.levels[0]) if self.verbose: print('Calculating load rankings.') # Column names to search in stats_df # - [name, variable, stat], i.e.['DLC1.1','TwrBsFxt','max'] cnames = [pd.MultiIndex.from_product([names, var, [stat]]) for var, stat in zip(self.ranking_vars, self.ranking_stats)] rank__ascending = False # Collect load rankings collected_rankings = [] for col in cnames: # Set column names for dataframe mi_name = list(col.levels[0]) mi_stat = col.levels[2] # length = 1 mi_idx = col.levels[2][0] + '_case_idx' if len(col.levels[1]) > 1: mi_var = [col.levels[1][0][:-1]] else: mi_var = list(col.levels[1]) mi_colnames = pd.MultiIndex.from_product([mi_name, mi_var, [mi_idx, mi_stat[0]]]) # Check for valid stats for c in col: if c not in list(stats_df.columns.values): print('WARNING: {} does not exist in statistics.'.format(c)) col = col.drop(c) # raise ValueError('{} does not exist in statistics'.format(c)) # Go to next case if no [stat, var] exists in this set if len(col) == 0: continue # Extract desired variables from stats dataframe if mi_stat in ['max', 'abs']: var_df = stats_df[col].max(axis=1, level=0) rank__ascending = False elif mi_stat in ['min']: var_df = stats_df[col].min(axis=1, level=0) rank__ascending = True elif mi_stat in ['mean', 'std']: var_df = stats_df[col].mean(axis=1, level=0) rank__ascending = False # Combine ranking dataframes for each dataset var_df_list = [var_df[column].sort_values( ascending=rank__ascending).reset_index() for column in var_df.columns] single_lr = pd.concat(var_df_list, axis=1) single_lr.columns = mi_colnames collected_rankings.append(single_lr) # Combine dataframes for each case load_ranking_df = pd.concat(collected_rankings, axis=1).sort_index(axis=1) # Generate dict of info load_ranking = pdTools.df2dict(load_ranking_df) if get_df: return load_ranking, load_ranking_df else: return load_ranking
def design_comparison(self, filenames): ''' Compare design runs Parameters: ---------- filenames: list list of lists, where the inner lists are of equal length. Returns: -------- stats: dict dictionary of summary statistics data load_rankings: dict dictionary of load rankings ''' # Make sure datasets are the same length ds_len = len(filenames[0]) if any(len(dataset) != ds_len for dataset in filenames): raise ValueError( 'The datasets for filenames corresponding to the design comparison should all be the same size.' ) fnames = np.array(filenames).T.tolist() # Setup FAST_Analysis preferences loads_analysis = Analysis.Loads_Analysis() loads_analysis.verbose = self.verbose loads_analysis.t0 = self.t0 loads_analysis.tf = self.tf loads_analysis.ranking_vars = self.ranking_vars loads_analysis.ranking_stats = self.ranking_stats loads_analysis.DEL_info = self.DEL_info if self.parallel_analysis: # run analysis in parallel # run analysis pool = mp.Pool(self.parallel_cores) stats_separate = pool.map( partial(loads_analysis.full_loads_analysis, get_load_ranking=False), fnames) pool.close() pool.join() # Re-sort into the more "standard" dictionary/dataframe format we like stats = [pdTools.dict2df(ss).unstack() for ss in stats_separate] dft = pd.DataFrame(stats) dft = dft.reorder_levels([2, 0, 1], axis=1).sort_index(axis=1, level=0) stats = pdTools.df2dict(dft) # Get load rankings after stats are loaded load_rankings = loads_analysis.load_ranking( stats, names=self.dataset_names) else: # run analysis in serial stats = [] load_rankings = [] for file_sets in filenames: st, lr = loads_analysis.full_loads_analysis( file_sets, get_load_ranking=True, names=self.dataset_names) stats.append(st) load_rankings.append(lr) return stats, load_rankings
def batch_processing(self): ''' Run a full batch processing case! ''' # ------------------ Input consistancy checks ------------------ # # Do we have a list of data? N = len(self.OpenFAST_outfile_list) if N == 0: raise ValueError( 'Output files not defined! Populate: "FastPost.OpenFAST_outfile_list". \n Quitting FAST_Processing.' ) # Do all the files exist? files_exist = True for i, flist in enumerate(self.OpenFAST_outfile_list): if isinstance(flist, str): if not os.path.exists(flist): print('Warning! File "{}" does not exist.'.format(flist)) self.OpenFAST_outfile_list.remove(flist) elif isinstance(flist, list): for fname in flist: if not os.path.exists(fname): files_exist = False if len(self.dataset_names) > 0: print('Warning! File "{}" from {} does not exist.'. format(fname, self.dataset_names[i])) flist.remove(fname) else: print( 'Warning! File "{}" from dataset {} of {} does not exist.' .format(fname, i + 1, N)) flist.remove(fname) # # load case matrix data to get descriptive case naming # if self.fname_case_matrix == '': # print('Warning! No case matrix file provided, no case descriptions will be provided.') # self.case_desc = ['Case ID %d' % i for i in range(M)] # else: # cases = load_case_matrix(self.fname_case_matrix) # self.case_desc = get_dlc_label(cases, include_seed=True) # get unique file namebase for datasets self.namebase = [] if len(self.dataset_names) > 0: # use filename safe version of dataset names self.namebase = [ "".join([ c for c in name if c.isalpha() or c.isdigit() or c in ['_', '-'] ]).rstrip() for i, name in zip(range(N), self.dataset_names) ] elif len(self.OpenFAST_outfile_list) > 0: # use out file naming if isinstance(self.OpenFAST_outfile_list[0], list): self.namebase = [ '_'.join(os.path.split(flist[0])[1].split('_')[:-1]) for flist in self.OpenFAST_outfile_list ] else: self.namebase = [ '_'.join(os.path.split(flist)[1].split('_')[:-1]) for flist in self.OpenFAST_outfile_list ] # check that names are unique if not len(self.namebase) == len(set(self.namebase)): self.namebase = [] # as last resort, give generic name if not self.namebase: if isinstance(self.OpenFAST_outfile_list[0], str): # Just one dataset name for single dataset self.namebase = ['dataset1'] else: self.namebase = [ 'dataset' + ('{}'.format(i)).zfill(len(str(N - 1))) for i in range(N) ] # Run design comparison if filenames list has multiple lists if (len(self.OpenFAST_outfile_list) > 1) and (isinstance( self.OpenFAST_outfile_list[0], list)): # Load stats and load rankings for design comparisons stats, load_rankings = self.design_comparison( self.OpenFAST_outfile_list) else: # Initialize Analysis loads_analysis = Analysis.Loads_Analysis() loads_analysis.verbose = self.verbose loads_analysis.t0 = self.t0 loads_analysis.tf = self.tf loads_analysis.DEL_info = self.DEL_info loads_analysis.ranking_stats = self.ranking_stats loads_analysis.ranking_vars = self.ranking_vars # run analysis in parallel if self.parallel_analysis: pool = mp.Pool(self.parallel_cores) try: stats_separate = pool.map( partial(loads_analysis.full_loads_analysis, get_load_ranking=False), self.OpenFAST_outfile_list) except: stats_separate = pool.map( partial(loads_analysis.full_loads_analysis, get_load_ranking=False), self.OpenFAST_outfile_list[0]) pool.close() pool.join() # Re-sort into the more "standard" dictionary/dataframe format we like stats = [ pdTools.dict2df(ss).unstack() for ss in stats_separate ] dft = pd.DataFrame(stats) dft = dft.reorder_levels([2, 0, 1], axis=1).sort_index(axis=1, level=0) stats = pdTools.df2dict(dft) # Get load rankings after stats are loaded load_rankings = loads_analysis.load_ranking( stats, names=self.dataset_names, get_df=False) # run analysis in serial else: # Initialize Analysis loads_analysis = Analysis.Loads_Analysis() loads_analysis.verbose = self.verbose loads_analysis.t0 = self.t0 loads_analysis.tf = self.tf loads_analysis.ranking_stats = self.ranking_stats loads_analysis.ranking_vars = self.ranking_vars loads_analysis.DEL_info = self.DEL_info stats, load_rankings = loads_analysis.full_loads_analysis( self.OpenFAST_outfile_list, get_load_ranking=True) if self.save_SummaryStats: if isinstance(stats, dict): fname = self.namebase[0] + '_stats.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, stats) else: for namebase, st in zip(self.namebase, stats): fname = namebase + '_stats.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, st) if self.save_LoadRanking: if isinstance(load_rankings, dict): fname = self.namebase[0] + '_LoadRanking.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, load_rankings) else: for namebase, lr in zip(self.namebase, load_rankings): fname = namebase + '_LoadRanking.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, lr) return stats, load_rankings
def AEP(self, stats, windspeeds, U_pwr_curve=[], pwr_curve_vars=[]): ''' Get AEPs for simulation cases TODO: Print/Save this someplace besides the console Parameters: ---------- stats: dict, list, pd.DataFrame Dict (single case), list(multiple cases), df(single or multiple cases) containing summary statistics. windspeeds: list-like List of wind speed values corresponding to each power output in the stats input for a single dataset n_pwr_curve: list-like List of wind speed values to output power variables pwr_curve_vars: list of strings List of OpenFAST output channels to return the mean value as a function of wind speeds Returns: -------- AEP: List Annual energy production corresponding to ''' # Make sure stats is in pandas df if isinstance(stats, dict): stats_df = pdTools.dict2df(stats) elif isinstance(stats, list): stats_df = pdTools.dict2df(stats) elif isinstance(stats, pd.DataFrame): stats_df = stats else: raise TypeError( 'Input stats is must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.' ) # Check windspeed length if len(windspeeds) == len(stats_df): ws = windspeeds elif int(len(windspeeds) / len(stats_df.columns.levels[0])) == len(stats_df): ws = windspeeds[0:len(stats_df)] print( 'WARNING: Assuming the input windspeed array is duplicated for each dataset.' ) else: raise ValueError( 'Length of windspeeds is not the correct length for the input statistics.' ) # load power array if 'GenPwr' in stats_df.columns.levels[0]: pwr_array = stats_df.loc[:, ('GenPwr', 'mean')] pwr_array = pwr_array.to_frame() elif 'GenPwr' in stats_df.columns.levels[1]: pwr_array = stats_df.loc[:, (slice(None), 'GenPwr', 'mean')] else: raise ValueError( "('GenPwr','Mean') does not exist in the input statistics.") # group and average powers by wind speeds pwr_array['windspeeds'] = ws pwr_array = pwr_array.groupby('windspeeds').mean() # find set of wind speeds ws_set = list(set(ws)) # wind probability wind_prob = self.prob_WindDist(ws_set, disttype='pdf') # Calculate AEP AEP = np.trapz(pwr_array.T * wind_prob, ws_set) * 8760 # return power curves if len(pwr_curve_vars) > 0: performance_curves = {} if len(U_pwr_curve) > 0: performance_curves['U'] = U_pwr_curve else: performance_curves['U'] = ws_set for var in pwr_curve_vars: # get data if var in stats_df.columns.levels[0]: perf_array = stats_df.loc[:, (var, 'mean')] perf_array = perf_array.to_frame() elif var in stats_df.columns.levels[1]: perf_array = stats_df.loc[:, (slice(None), var, 'mean')] else: raise ValueError( "(%s,'Mean') does not exist in the input statistics." % var) # average by wind speed perf_array['windspeeds'] = ws perf_array = perf_array.groupby('windspeeds').mean() if len(U_pwr_curve) > 0: spline = PchipInterpolator(ws_set, perf_array[var]) performance_curves[var] = spline( performance_curves['U']).flatten() else: performance_curves[var] = perf_array[var] if len(pwr_curve_vars) > 0: return AEP, performance_curves else: return AEP
fp.t0 = 0 fp.parallel_analysis = False fp.results_dir = os.path.join(output_dir, 'stats') fp.verbose = True if save_results: fp.save_LoadRanking = True fp.save_SummaryStats = True # Load and save statistics and load rankings stats, load_rankings = fp.batch_processing() # Get wind speeds for processed runs windspeeds, seed, IECtype, cm_wind = Processing.get_windspeeds( cm, return_df=True) stats_df = pdTools.dict2df(stats) print('here') # # Get AEP # pp = Analysis.Power_Production() # Vavg = 10 # Average wind speed of cite # Vrange = [2,26] # Range of wind speeds being considered # # bnums = int(len(set(windspeeds))/len(fp.namebase)) # Number of wind speeds per dataset for binning data # bnums = len(fp.OpenFAST_outfile_list) # pp.windspeeds = list(set(windspeeds)) # p = pp.gen_windPDF(Vavg, bnums, Vrange) # AEP = pp.AEP(stats) # print('AEP = {}'.format(AEP)) # # Plot some spectral cases