def stat_curve(self, windspeeds, stats, plotvar, plottype, stat_idx=0, names=[]): ''' Plot the turbulent power curve for a set of data. Can be plotted as bar (good for comparing multiple cases) or line Parameters: ------- windspeeds: list-like List of wind speeds to plot stats: list, dict, or pd.DataFrame Dict (single case), list(multiple cases), df(single or multiple cases) containing summary statistics. plotvar: str Type of variable to plot plottype: str bar or line stat_idx: int, optional Index of datasets in stats to plot from Returns: -------- fig: figure handle ax: axes handle ''' # Check for valid inputs if isinstance(stats, dict): stats_df = pdTools.dict2df(stats) if any((stat_inds > 0) or (isinstance(stat_inds, list))): print( 'WARNING: stat_ind = {} is invalid for a single stats dictionary. Defaulting to stat_inds=0.' ) stat_inds = 0 elif isinstance(stats, list): stats_df = pdTools.dict2df(stats) elif isinstance(stats, pd.DataFrame): stats_df = stats else: raise TypeError( 'Input stats must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.' ) # Check windspeed length if len(windspeeds) == len(stats_df): ws = windspeeds elif int(len(windspeeds) / len(stats_df.columns.levels[0])) == len(stats_df): ws = windspeeds[0:len(stats_df)] else: raise ValueError( 'Length of windspeeds is not the correct length for the input statistics' ) # Get statistical data for desired plot variable if plotvar in stats_df.columns.levels[0]: sdf = stats_df.loc[:, (plotvar, slice(None))].droplevel([0], axis=1) elif plotvar in stats_df.columns.levels[1]: sdf = stats_df.loc[:, (slice(None), plotvar, slice(None))].droplevel([1], axis=1) else: raise ValueError( "('GenPwr','Mean') does not exist in the input statistics.") # Add windspeeds to data sdf['WindSpeeds'] = ws # Group by windspeed and average each statistic (for multiple seeds) sdf = sdf.groupby('WindSpeeds').mean() # Final wind speed values pl_windspeeds = sdf.index.values if plottype == 'bar': # Define mean and std dataframes means = sdf.loc[:, (slice(None), 'mean')].droplevel(1, axis=1) std = sdf.loc[:, (slice(None), 'std')].droplevel(1, axis=1) # Plot bar charts fig, ax = plt.subplots() means.plot.bar(yerr=std, ax=ax, title=plotvar, capsize=2) ax.legend(names, loc='upper left') if plottype == 'line': # Define mean, min, max, and std dataframes means = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'mean')] smax = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'max')] smin = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'min')] std = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'std')] fig, ax = plt.subplots() ax.errorbar(pl_windspeeds, means, [means - smin, smax - means], fmt='k', ecolor='gray', lw=1, capsize=2) means.plot(yerr=std, ax=ax, capsize=2, lw=3, elinewidth=2, title=names[0] + ' - ' + plotvar) plt.grid(lw=0.5, linestyle='--') return fig, ax
def AEP(self, stats, windspeeds): ''' Get AEPs for simulation cases TODO: Print/Save this someplace besides the console Parameters: ---------- stats: dict, list, pd.DataFrame Dict (single case), list(multiple cases), df(single or multiple cases) containing summary statistics. windspeeds: list-like List of wind speed values corresponding to each power output in the stats input for a single dataset Returns: -------- AEP: List Annual energy production corresponding to ''' # Make sure stats is in pandas df if isinstance(stats, dict): stats_df = pdTools.dict2df(stats) elif isinstance(stats, list): stats_df = pdTools.dict2df(stats) elif isinstance(stats, pd.DataFrame): stats_df = stats else: raise TypeError( 'Input stats is must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.' ) # Check windspeed length if len(windspeeds) == len(stats_df): ws = windspeeds elif int(len(windspeeds) / len(stats_df.columns.levels[0])) == len(stats_df): ws = windspeeds[0:len(stats_df)] print( 'WARNING: Assuming the input windspeed array is duplicated for each dataset.' ) else: raise ValueError( 'Length of windspeeds is not the correct length for the input statistics.' ) # load power array if 'GenPwr' in stats_df.columns.levels[0]: pwr_array = np.array(stats_df.loc[:, ('GenPwr', 'mean')]) elif 'GenPwr' in stats_df.columns.levels[1]: pwr_array = stats_df.loc[:, (slice(None), 'GenPwr', 'mean')] else: raise ValueError( "('GenPwr','Mean') does not exist in the input statistics.") # group and average powers by wind speeds pwr_array['windspeeds'] = ws pwr_array = pwr_array.groupby('windspeeds').mean() # find set of wind speeds ws_set = list(set(ws)) # wind probability wind_prob = self.prob_WindDist(ws_set, disttype='pdf') # Calculate AEP AEP = np.trapz(pwr_array.T * wind_prob, ws_set) * 8760 return AEP
def design_comparison(self, filenames): ''' Compare design runs Parameters: ---------- filenames: list list of lists, where the inner lists are of equal length. Returns: -------- stats: dict dictionary of summary statistics data load_rankings: dict dictionary of load rankings ''' # Make sure datasets are the same length ds_len = len(filenames[0]) if any(len(dataset) != ds_len for dataset in filenames): raise ValueError( 'The datasets for filenames corresponding to the design comparison should all be the same size.' ) fnames = np.array(filenames).T.tolist() # Setup FAST_Analysis preferences loads_analysis = Analysis.Loads_Analysis() loads_analysis.verbose = self.verbose loads_analysis.t0 = self.t0 loads_analysis.tf = self.tf loads_analysis.ranking_vars = self.ranking_vars loads_analysis.ranking_stats = self.ranking_stats if self.parallel_analysis: # run analysis in parallel # run analysis pool = mp.Pool(self.parallel_cores) stats_separate = pool.map( partial(loads_analysis.full_loads_analysis, get_load_ranking=False), fnames) pool.close() pool.join() # Re-sort into the more "standard" dictionary/dataframe format we like stats = [pdTools.dict2df(ss).unstack() for ss in stats_separate] dft = pd.DataFrame(stats) dft = dft.reorder_levels([2, 0, 1], axis=1).sort_index(axis=1, level=0) stats = pdTools.df2dict(dft) # Get load rankings after stats are loaded load_rankings = loads_analysis.load_ranking(stats) else: # run analysis in serial stats = [] load_rankings = [] for file_sets in filenames: st, lr = loads_analysis.full_loads_analysis( file_sets, get_load_ranking=True) stats.append(st) load_rankings.append(lr) return stats, load_rankings
def load_ranking(self, stats, names=[], get_df=False): ''' Find load rankings for desired signals Parameters: ------- stats: dict, list, pd.DataFrame summary statistic information ranking_stats: list desired statistics to rank for load ranking (e.g. ['max', 'std']) ranking_vars: list desired variables to for load ranking (e.g. ['GenTq', ['RootMyb1', 'RootMyb2', 'RootMyb3']]) names: list of strings, optional names corresponding to each dataset get_df: bool, optional Return pd.DataFrame of data? Returns: ------- load_ranking: dict dictionary containing load rankings load_ranking_df: pd.DataFrame pandas DataFrame containing load rankings ''' # Make sure stats is in pandas df if isinstance(stats, dict): stats_df = pdTools.dict2df([stats], names=names) elif isinstance(stats, list): stats_df = pdTools.dict2df(stats, names=names) elif isinstance(stats, pd.DataFrame): stats_df = stats else: raise TypeError( 'Input stats is must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.' ) # Ensure naming consitency if not names: names = list(stats_df.columns.levels[0]) if self.verbose: print('Calculating load rankings.') # Column names to search in stats_df # - [name, variable, stat], i.e.['DLC1.1','TwrBsFxt','max'] cnames = [ pd.MultiIndex.from_product([names, var, [stat]]) for var, stat in zip(self.ranking_vars, self.ranking_stats) ] rank__ascending = False # Collect load rankings collected_rankings = [] for col in cnames: # Set column names for dataframe mi_name = list(col.levels[0]) mi_stat = col.levels[2] # length = 1 mi_idx = col.levels[2][0] + '_case_idx' if len(col.levels[1]) > 1: mi_var = [col.levels[1][0][:-1]] else: mi_var = list(col.levels[1]) mi_colnames = pd.MultiIndex.from_product( [mi_name, mi_var, [mi_idx, mi_stat[0]]]) # Check for valid stats for c in col: if c not in list(stats_df.columns.values): print( 'WARNING: {} does not exist in statistics.'.format(c)) col = col.drop(c) # raise ValueError('{} does not exist in statistics'.format(c)) # Go to next case if no [stat, var] exists in this set if len(col) == 0: continue # Extract desired variables from stats dataframe if mi_stat in ['max', 'abs']: var_df = stats_df[col].max(axis=1, level=0) rank__ascending = False elif mi_stat in ['min']: var_df = stats_df[col].min(axis=1, level=0) rank__ascending = True elif mi_stat in ['mean', 'std']: var_df = stats_df[col].mean(axis=1, level=0) rank__ascending = False # Combine ranking dataframes for each dataset var_df_list = [ var_df[column].sort_values( ascending=rank__ascending).reset_index() for column in var_df.columns ] single_lr = pd.concat(var_df_list, axis=1) single_lr.columns = mi_colnames collected_rankings.append(single_lr) # Combine dataframes for each case load_ranking_df = pd.concat(collected_rankings, axis=1).sort_index(axis=1) # Generate dict of info load_ranking = pdTools.df2dict(load_ranking_df) if get_df: return load_ranking, load_ranking_df else: return load_ranking
def batch_processing(self): ''' Run a full batch processing case! ''' # ------------------ Input consistancy checks ------------------ # # Do we have a list of data? N = len(self.OpenFAST_outfile_list) if N == 0: raise ValueError( 'Output files not defined! Populate: "FastPost.OpenFAST_outfile_list". \n Quitting FAST_Processing.' ) # Do all the files exist? files_exist = True for i, flist in enumerate(self.OpenFAST_outfile_list): if isinstance(flist, str): if not os.path.exists(flist): print('Warning! File "{}" does not exist.'.format(flist)) self.OpenFAST_outfile_list.remove(flist) elif isinstance(flist, list): for fname in flist: if not os.path.exists(fname): files_exist = False if len(self.dataset_names) > 0: print('Warning! File "{}" from {} does not exist.'. format(fname, self.dataset_names[i])) flist.remove(fname) else: print( 'Warning! File "{}" from dataset {} of {} does not exist.' .format(fname, i + 1, N)) flist.remove(fname) # # load case matrix data to get descriptive case naming # if self.fname_case_matrix == '': # print('Warning! No case matrix file provided, no case descriptions will be provided.') # self.case_desc = ['Case ID %d' % i for i in range(M)] # else: # cases = load_case_matrix(self.fname_case_matrix) # self.case_desc = get_dlc_label(cases, include_seed=True) # get unique file namebase for datasets self.namebase = [] if len(self.dataset_names) > 0: # use filename safe version of dataset names self.namebase = [ "".join([ c for c in name if c.isalpha() or c.isdigit() or c in ['_', '-'] ]).rstrip() for i, name in zip(range(N), self.dataset_names) ] elif len(self.OpenFAST_outfile_list) > 0: # use out file naming if isinstance(self.OpenFAST_outfile_list[0], list): self.namebase = [ '_'.join(os.path.split(flist[0])[1].split('_')[:-1]) for flist in self.OpenFAST_outfile_list ] else: self.namebase = [ '_'.join(os.path.split(flist)[1].split('_')[:-1]) for flist in self.OpenFAST_outfile_list ] # check that names are unique if not len(self.namebase) == len(set(self.namebase)): self.namebase = [] # as last resort, give generic name if not self.namebase: if isinstance(self.OpenFAST_outfile_list[0], str): # Just one dataset name for single dataset self.namebase = ['dataset1'] else: self.namebase = [ 'dataset' + ('{}'.format(i)).zfill(len(str(N - 1))) for i in range(N) ] # Run design comparison if filenames list has multiple lists if (len(self.OpenFAST_outfile_list) > 1) and (isinstance( self.OpenFAST_outfile_list[0], list)): # Load stats and load rankings for design comparisons stats, load_rankings = self.design_comparison( self.OpenFAST_outfile_list) else: # Initialize Analysis loads_analysis = Analysis.Loads_Analysis() loads_analysis.verbose = self.verbose loads_analysis.t0 = self.t0 loads_analysis.tf = self.tf # run analysis in parallel if self.parallel_analysis: pool = mp.Pool(self.parallel_cores) try: stats_separate = pool.map( partial(loads_analysis.full_loads_analysis, get_load_ranking=False), self.OpenFAST_outfile_list) except: stats_separate = pool.map( partial(loads_analysis.full_loads_analysis, get_load_ranking=False), self.OpenFAST_outfile_list[0]) pool.close() pool.join() # Re-sort into the more "standard" dictionary/dataframe format we like stats = [ pdTools.dict2df(ss).unstack() for ss in stats_separate ] dft = pd.DataFrame(stats) dft = dft.reorder_levels([2, 0, 1], axis=1).sort_index(axis=1, level=0) stats = pdTools.df2dict(dft) # Get load rankings after stats are loaded load_rankings = loads_analysis.load_ranking( stats, names=self.dataset_names, get_df=False) # run analysis in serial else: # Initialize Analysis loads_analysis = Analysis.Loads_Analysis() loads_analysis.verbose = self.verbose loads_analysis.t0 = self.t0 loads_analysis.tf = self.tf stats, load_rankings = loads_analysis.full_loads_analysis( self.OpenFAST_outfile_list, get_load_ranking=True) if self.save_SummaryStats: if isinstance(stats, dict): fname = self.namebase[0] + '_stats.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, stats) else: for namebase, st in zip(self.namebase, stats): fname = namebase + '_stats.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, st) if self.save_LoadRanking: if isinstance(load_rankings, dict): fname = self.namebase[0] + '_LoadRanking.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, load_rankings) else: for namebase, lr in zip(self.namebase, load_rankings): fname = namebase + '_LoadRanking.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, lr) return stats, load_rankings