def design_comparison(self, filenames): ''' Compare design runs Parameters: ---------- filenames: list list of lists, where the inner lists are of equal length. Returns: -------- stats: dict dictionary of summary statistics data load_rankings: dict dictionary of load rankings ''' # Make sure datasets are the same length ds_len = len(filenames[0]) if any(len(dataset) != ds_len for dataset in filenames): raise ValueError( 'The datasets for filenames corresponding to the design comparison should all be the same size.' ) fnames = np.array(filenames).T.tolist() # Setup FAST_Analysis preferences loads_analysis = Analysis.Loads_Analysis() loads_analysis.verbose = self.verbose loads_analysis.t0 = self.t0 loads_analysis.tf = self.tf loads_analysis.ranking_vars = self.ranking_vars loads_analysis.ranking_stats = self.ranking_stats loads_analysis.DEL_info = self.DEL_info if self.parallel_analysis: # run analysis in parallel # run analysis pool = mp.Pool(self.parallel_cores) stats_separate = pool.map( partial(loads_analysis.full_loads_analysis, get_load_ranking=False), fnames) pool.close() pool.join() # Re-sort into the more "standard" dictionary/dataframe format we like stats = [pdTools.dict2df(ss).unstack() for ss in stats_separate] dft = pd.DataFrame(stats) dft = dft.reorder_levels([2, 0, 1], axis=1).sort_index(axis=1, level=0) stats = pdTools.df2dict(dft) # Get load rankings after stats are loaded load_rankings = loads_analysis.load_ranking( stats, names=self.dataset_names) else: # run analysis in serial stats = [] load_rankings = [] for file_sets in filenames: st, lr = loads_analysis.full_loads_analysis( file_sets, get_load_ranking=True, names=self.dataset_names) stats.append(st) load_rankings.append(lr) return stats, load_rankings
def load_ranking(self, stats, names=[], get_df=False): ''' Find load rankings for desired signals Parameters: ------- stats: dict, list, pd.DataFrame summary statistic information ranking_stats: list desired statistics to rank for load ranking (e.g. ['max', 'std']) ranking_vars: list desired variables to for load ranking (e.g. ['GenTq', ['RootMyb1', 'RootMyb2', 'RootMyb3']]) names: list of strings, optional names corresponding to each dataset get_df: bool, optional Return pd.DataFrame of data? Returns: ------- load_ranking: dict dictionary containing load rankings load_ranking_df: pd.DataFrame pandas DataFrame containing load rankings ''' # Make sure stats is in pandas df if isinstance(stats, dict): stats_df = pdTools.dict2df([stats], names=names) elif isinstance(stats, list): stats_df = pdTools.dict2df(stats, names=names) elif isinstance(stats, pd.DataFrame): stats_df = stats else: raise TypeError('Input stats is must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.') # Ensure naming consitency if not names: names = list(stats_df.columns.levels[0]) if self.verbose: print('Calculating load rankings.') # Column names to search in stats_df # - [name, variable, stat], i.e.['DLC1.1','TwrBsFxt','max'] cnames = [pd.MultiIndex.from_product([names, var, [stat]]) for var, stat in zip(self.ranking_vars, self.ranking_stats)] rank__ascending = False # Collect load rankings collected_rankings = [] for col in cnames: # Set column names for dataframe mi_name = list(col.levels[0]) mi_stat = col.levels[2] # length = 1 mi_idx = col.levels[2][0] + '_case_idx' if len(col.levels[1]) > 1: mi_var = [col.levels[1][0][:-1]] else: mi_var = list(col.levels[1]) mi_colnames = pd.MultiIndex.from_product([mi_name, mi_var, [mi_idx, mi_stat[0]]]) # Check for valid stats for c in col: if c not in list(stats_df.columns.values): print('WARNING: {} does not exist in statistics.'.format(c)) col = col.drop(c) # raise ValueError('{} does not exist in statistics'.format(c)) # Go to next case if no [stat, var] exists in this set if len(col) == 0: continue # Extract desired variables from stats dataframe if mi_stat in ['max', 'abs']: var_df = stats_df[col].max(axis=1, level=0) rank__ascending = False elif mi_stat in ['min']: var_df = stats_df[col].min(axis=1, level=0) rank__ascending = True elif mi_stat in ['mean', 'std']: var_df = stats_df[col].mean(axis=1, level=0) rank__ascending = False # Combine ranking dataframes for each dataset var_df_list = [var_df[column].sort_values( ascending=rank__ascending).reset_index() for column in var_df.columns] single_lr = pd.concat(var_df_list, axis=1) single_lr.columns = mi_colnames collected_rankings.append(single_lr) # Combine dataframes for each case load_ranking_df = pd.concat(collected_rankings, axis=1).sort_index(axis=1) # Generate dict of info load_ranking = pdTools.df2dict(load_ranking_df) if get_df: return load_ranking, load_ranking_df else: return load_ranking
def batch_processing(self): ''' Run a full batch processing case! ''' # ------------------ Input consistancy checks ------------------ # # Do we have a list of data? N = len(self.OpenFAST_outfile_list) if N == 0: raise ValueError( 'Output files not defined! Populate: "FastPost.OpenFAST_outfile_list". \n Quitting FAST_Processing.' ) # Do all the files exist? files_exist = True for i, flist in enumerate(self.OpenFAST_outfile_list): if isinstance(flist, str): if not os.path.exists(flist): print('Warning! File "{}" does not exist.'.format(flist)) self.OpenFAST_outfile_list.remove(flist) elif isinstance(flist, list): for fname in flist: if not os.path.exists(fname): files_exist = False if len(self.dataset_names) > 0: print('Warning! File "{}" from {} does not exist.'. format(fname, self.dataset_names[i])) flist.remove(fname) else: print( 'Warning! File "{}" from dataset {} of {} does not exist.' .format(fname, i + 1, N)) flist.remove(fname) # # load case matrix data to get descriptive case naming # if self.fname_case_matrix == '': # print('Warning! No case matrix file provided, no case descriptions will be provided.') # self.case_desc = ['Case ID %d' % i for i in range(M)] # else: # cases = load_case_matrix(self.fname_case_matrix) # self.case_desc = get_dlc_label(cases, include_seed=True) # get unique file namebase for datasets self.namebase = [] if len(self.dataset_names) > 0: # use filename safe version of dataset names self.namebase = [ "".join([ c for c in name if c.isalpha() or c.isdigit() or c in ['_', '-'] ]).rstrip() for i, name in zip(range(N), self.dataset_names) ] elif len(self.OpenFAST_outfile_list) > 0: # use out file naming if isinstance(self.OpenFAST_outfile_list[0], list): self.namebase = [ '_'.join(os.path.split(flist[0])[1].split('_')[:-1]) for flist in self.OpenFAST_outfile_list ] else: self.namebase = [ '_'.join(os.path.split(flist)[1].split('_')[:-1]) for flist in self.OpenFAST_outfile_list ] # check that names are unique if not len(self.namebase) == len(set(self.namebase)): self.namebase = [] # as last resort, give generic name if not self.namebase: if isinstance(self.OpenFAST_outfile_list[0], str): # Just one dataset name for single dataset self.namebase = ['dataset1'] else: self.namebase = [ 'dataset' + ('{}'.format(i)).zfill(len(str(N - 1))) for i in range(N) ] # Run design comparison if filenames list has multiple lists if (len(self.OpenFAST_outfile_list) > 1) and (isinstance( self.OpenFAST_outfile_list[0], list)): # Load stats and load rankings for design comparisons stats, load_rankings = self.design_comparison( self.OpenFAST_outfile_list) else: # Initialize Analysis loads_analysis = Analysis.Loads_Analysis() loads_analysis.verbose = self.verbose loads_analysis.t0 = self.t0 loads_analysis.tf = self.tf loads_analysis.DEL_info = self.DEL_info loads_analysis.ranking_stats = self.ranking_stats loads_analysis.ranking_vars = self.ranking_vars # run analysis in parallel if self.parallel_analysis: pool = mp.Pool(self.parallel_cores) try: stats_separate = pool.map( partial(loads_analysis.full_loads_analysis, get_load_ranking=False), self.OpenFAST_outfile_list) except: stats_separate = pool.map( partial(loads_analysis.full_loads_analysis, get_load_ranking=False), self.OpenFAST_outfile_list[0]) pool.close() pool.join() # Re-sort into the more "standard" dictionary/dataframe format we like stats = [ pdTools.dict2df(ss).unstack() for ss in stats_separate ] dft = pd.DataFrame(stats) dft = dft.reorder_levels([2, 0, 1], axis=1).sort_index(axis=1, level=0) stats = pdTools.df2dict(dft) # Get load rankings after stats are loaded load_rankings = loads_analysis.load_ranking( stats, names=self.dataset_names, get_df=False) # run analysis in serial else: # Initialize Analysis loads_analysis = Analysis.Loads_Analysis() loads_analysis.verbose = self.verbose loads_analysis.t0 = self.t0 loads_analysis.tf = self.tf loads_analysis.ranking_stats = self.ranking_stats loads_analysis.ranking_vars = self.ranking_vars loads_analysis.DEL_info = self.DEL_info stats, load_rankings = loads_analysis.full_loads_analysis( self.OpenFAST_outfile_list, get_load_ranking=True) if self.save_SummaryStats: if isinstance(stats, dict): fname = self.namebase[0] + '_stats.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, stats) else: for namebase, st in zip(self.namebase, stats): fname = namebase + '_stats.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, st) if self.save_LoadRanking: if isinstance(load_rankings, dict): fname = self.namebase[0] + '_LoadRanking.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, load_rankings) else: for namebase, lr in zip(self.namebase, load_rankings): fname = namebase + '_LoadRanking.yaml' if self.verbose: print('Saving {}'.format(fname)) save_yaml(self.results_dir, fname, lr) return stats, load_rankings