Example #1
0
    def stat_curve(self,
                   windspeeds,
                   stats,
                   plotvar,
                   plottype,
                   stat_idx=0,
                   names=[]):
        '''
        Plot the turbulent power curve for a set of data. 
        Can be plotted as bar (good for comparing multiple cases) or line 

        Parameters:
        -------
        windspeeds: list-like
            List of wind speeds to plot
        stats: list, dict, or pd.DataFrame
            Dict (single case), list(multiple cases), df(single or multiple cases) containing
            summary statistics. 
        plotvar: str
            Type of variable to plot
        plottype: str
            bar or line 
        stat_idx: int, optional
            Index of datasets in stats to plot from
        
        Returns:
        --------
        fig: figure handle
        ax: axes handle
        '''

        # Check for valid inputs
        if isinstance(stats, dict):
            stats_df = pdTools.dict2df(stats)
            if any((stat_inds > 0) or (isinstance(stat_inds, list))):
                print(
                    'WARNING: stat_ind = {} is invalid for a single stats dictionary. Defaulting to stat_inds=0.'
                )
                stat_inds = 0
        elif isinstance(stats, list):
            stats_df = pdTools.dict2df(stats)
        elif isinstance(stats, pd.DataFrame):
            stats_df = stats
        else:
            raise TypeError(
                'Input stats must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.'
            )

        # Check windspeed length
        if len(windspeeds) == len(stats_df):
            ws = windspeeds
        elif int(len(windspeeds) /
                 len(stats_df.columns.levels[0])) == len(stats_df):
            ws = windspeeds[0:len(stats_df)]
        else:
            raise ValueError(
                'Length of windspeeds is not the correct length for the input statistics'
            )

        # Get statistical data for desired plot variable
        if plotvar in stats_df.columns.levels[0]:
            sdf = stats_df.loc[:, (plotvar, slice(None))].droplevel([0],
                                                                    axis=1)
        elif plotvar in stats_df.columns.levels[1]:
            sdf = stats_df.loc[:, (slice(None), plotvar,
                                   slice(None))].droplevel([1], axis=1)
        else:
            raise ValueError(
                "('GenPwr','Mean') does not exist in the input statistics.")

        # Add windspeeds to data
        sdf['WindSpeeds'] = ws
        # Group by windspeed and average each statistic (for multiple seeds)
        sdf = sdf.groupby('WindSpeeds').mean()
        # Final wind speed values
        pl_windspeeds = sdf.index.values

        if plottype == 'bar':
            # Define mean and std dataframes
            means = sdf.loc[:, (slice(None), 'mean')].droplevel(1, axis=1)
            std = sdf.loc[:, (slice(None), 'std')].droplevel(1, axis=1)
            # Plot bar charts
            fig, ax = plt.subplots()
            means.plot.bar(yerr=std, ax=ax, title=plotvar, capsize=2)
            ax.legend(names, loc='upper left')

        if plottype == 'line':
            # Define mean, min, max, and std dataframes
            means = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'mean')]
            smax = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'max')]
            smin = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'min')]
            std = sdf.loc[:, (sdf.columns.levels[0][stat_idx], 'std')]

            fig, ax = plt.subplots()
            ax.errorbar(pl_windspeeds,
                        means, [means - smin, smax - means],
                        fmt='k',
                        ecolor='gray',
                        lw=1,
                        capsize=2)
            means.plot(yerr=std,
                       ax=ax,
                       capsize=2,
                       lw=3,
                       elinewidth=2,
                       title=names[0] + ' - ' + plotvar)
            plt.grid(lw=0.5, linestyle='--')

        return fig, ax
Example #2
0
    def AEP(self, stats, windspeeds):
        '''
        Get AEPs for simulation cases

        TODO: Print/Save this someplace besides the console
    
        Parameters:
        ----------
        stats: dict, list, pd.DataFrame
            Dict (single case), list(multiple cases), df(single or multiple cases) containing
            summary statistics. 
        windspeeds: list-like
            List of wind speed values corresponding to each power output in the stats input 
            for a single dataset

        Returns:
        --------
        AEP: List
            Annual energy production corresponding to 
        '''

        # Make sure stats is in pandas df
        if isinstance(stats, dict):
            stats_df = pdTools.dict2df(stats)
        elif isinstance(stats, list):
            stats_df = pdTools.dict2df(stats)
        elif isinstance(stats, pd.DataFrame):
            stats_df = stats
        else:
            raise TypeError(
                'Input stats is must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.'
            )

        # Check windspeed length
        if len(windspeeds) == len(stats_df):
            ws = windspeeds
        elif int(len(windspeeds) /
                 len(stats_df.columns.levels[0])) == len(stats_df):
            ws = windspeeds[0:len(stats_df)]
            print(
                'WARNING: Assuming the input windspeed array is duplicated for each dataset.'
            )
        else:
            raise ValueError(
                'Length of windspeeds is not the correct length for the input statistics.'
            )

        # load power array
        if 'GenPwr' in stats_df.columns.levels[0]:
            pwr_array = np.array(stats_df.loc[:, ('GenPwr', 'mean')])
        elif 'GenPwr' in stats_df.columns.levels[1]:
            pwr_array = stats_df.loc[:, (slice(None), 'GenPwr', 'mean')]
        else:
            raise ValueError(
                "('GenPwr','Mean') does not exist in the input statistics.")

        # group and average powers by wind speeds
        pwr_array['windspeeds'] = ws
        pwr_array = pwr_array.groupby('windspeeds').mean()
        # find set of wind speeds
        ws_set = list(set(ws))
        # wind probability
        wind_prob = self.prob_WindDist(ws_set, disttype='pdf')
        # Calculate AEP
        AEP = np.trapz(pwr_array.T * wind_prob, ws_set) * 8760

        return AEP
    def design_comparison(self, filenames):
        '''
        Compare design runs

        Parameters:
        ----------
        filenames: list
            list of lists, where the inner lists are of equal length. 

        Returns:
        --------
        stats: dict
            dictionary of summary statistics data
        load_rankings: dict
            dictionary of load rankings
        '''

        # Make sure datasets are the same length
        ds_len = len(filenames[0])
        if any(len(dataset) != ds_len for dataset in filenames):
            raise ValueError(
                'The datasets for filenames corresponding to the design comparison should all be the same size.'
            )

        fnames = np.array(filenames).T.tolist()
        # Setup FAST_Analysis preferences
        loads_analysis = Analysis.Loads_Analysis()
        loads_analysis.verbose = self.verbose
        loads_analysis.t0 = self.t0
        loads_analysis.tf = self.tf
        loads_analysis.ranking_vars = self.ranking_vars
        loads_analysis.ranking_stats = self.ranking_stats

        if self.parallel_analysis:  # run analysis in parallel
            # run analysis
            pool = mp.Pool(self.parallel_cores)
            stats_separate = pool.map(
                partial(loads_analysis.full_loads_analysis,
                        get_load_ranking=False), fnames)
            pool.close()
            pool.join()

            # Re-sort into the more "standard" dictionary/dataframe format we like
            stats = [pdTools.dict2df(ss).unstack() for ss in stats_separate]
            dft = pd.DataFrame(stats)
            dft = dft.reorder_levels([2, 0, 1], axis=1).sort_index(axis=1,
                                                                   level=0)
            stats = pdTools.df2dict(dft)

            # Get load rankings after stats are loaded
            load_rankings = loads_analysis.load_ranking(stats)

        else:  # run analysis in serial
            stats = []
            load_rankings = []
            for file_sets in filenames:
                st, lr = loads_analysis.full_loads_analysis(
                    file_sets, get_load_ranking=True)
                stats.append(st)
                load_rankings.append(lr)

        return stats, load_rankings
Example #4
0
    def load_ranking(self, stats, names=[], get_df=False):
        '''
        Find load rankings for desired signals

        Parameters:
        -------
        stats: dict, list, pd.DataFrame
            summary statistic information
        ranking_stats: list
            desired statistics to rank for load ranking (e.g. ['max', 'std'])
        ranking_vars: list
            desired variables to for load ranking (e.g. ['GenTq', ['RootMyb1', 'RootMyb2', 'RootMyb3']]) 
        names: list of strings, optional
            names corresponding to each dataset
        get_df: bool, optional
            Return pd.DataFrame of data?
        
        Returns:
        -------
        load_ranking: dict
            dictionary containing load rankings
        load_ranking_df: pd.DataFrame
            pandas DataFrame containing load rankings
        '''

        # Make sure stats is in pandas df
        if isinstance(stats, dict):
            stats_df = pdTools.dict2df([stats], names=names)
        elif isinstance(stats, list):
            stats_df = pdTools.dict2df(stats, names=names)
        elif isinstance(stats, pd.DataFrame):
            stats_df = stats
        else:
            raise TypeError(
                'Input stats is must be a dictionary, list, or pd.DataFrame containing OpenFAST output statistics.'
            )

        # Ensure naming consitency
        if not names:
            names = list(stats_df.columns.levels[0])

        if self.verbose:
            print('Calculating load rankings.')

        # Column names to search in stats_df
        #  - [name, variable, stat],  i.e.['DLC1.1','TwrBsFxt','max']
        cnames = [
            pd.MultiIndex.from_product([names, var, [stat]])
            for var, stat in zip(self.ranking_vars, self.ranking_stats)
        ]

        rank__ascending = False
        # Collect load rankings
        collected_rankings = []
        for col in cnames:
            # Set column names for dataframe
            mi_name = list(col.levels[0])
            mi_stat = col.levels[2]  # length = 1
            mi_idx = col.levels[2][0] + '_case_idx'
            if len(col.levels[1]) > 1:
                mi_var = [col.levels[1][0][:-1]]
            else:
                mi_var = list(col.levels[1])
            mi_colnames = pd.MultiIndex.from_product(
                [mi_name, mi_var, [mi_idx, mi_stat[0]]])

            # Check for valid stats
            for c in col:
                if c not in list(stats_df.columns.values):
                    print(
                        'WARNING: {} does not exist in statistics.'.format(c))
                    col = col.drop(c)
                    # raise ValueError('{} does not exist in statistics'.format(c))
            # Go to next case if no [stat, var] exists in this set
            if len(col) == 0:
                continue
            # Extract desired variables from stats dataframe
            if mi_stat in ['max', 'abs']:
                var_df = stats_df[col].max(axis=1, level=0)
                rank__ascending = False
            elif mi_stat in ['min']:
                var_df = stats_df[col].min(axis=1, level=0)
                rank__ascending = True
            elif mi_stat in ['mean', 'std']:
                var_df = stats_df[col].mean(axis=1, level=0)
                rank__ascending = False

            # Combine ranking dataframes for each dataset
            var_df_list = [
                var_df[column].sort_values(
                    ascending=rank__ascending).reset_index()
                for column in var_df.columns
            ]
            single_lr = pd.concat(var_df_list, axis=1)
            single_lr.columns = mi_colnames
            collected_rankings.append(single_lr)

        # Combine dataframes for each case
        load_ranking_df = pd.concat(collected_rankings,
                                    axis=1).sort_index(axis=1)
        # Generate dict of info
        load_ranking = pdTools.df2dict(load_ranking_df)

        if get_df:
            return load_ranking, load_ranking_df
        else:
            return load_ranking
    def batch_processing(self):
        '''
        Run a full batch processing case!
        '''
        # ------------------ Input consistancy checks ------------------ #
        # Do we have a list of data?
        N = len(self.OpenFAST_outfile_list)
        if N == 0:
            raise ValueError(
                'Output files not defined! Populate: "FastPost.OpenFAST_outfile_list". \n Quitting FAST_Processing.'
            )

        # Do all the files exist?
        files_exist = True
        for i, flist in enumerate(self.OpenFAST_outfile_list):
            if isinstance(flist, str):
                if not os.path.exists(flist):
                    print('Warning! File "{}" does not exist.'.format(flist))
                    self.OpenFAST_outfile_list.remove(flist)
            elif isinstance(flist, list):
                for fname in flist:
                    if not os.path.exists(fname):
                        files_exist = False
                        if len(self.dataset_names) > 0:
                            print('Warning! File "{}" from {} does not exist.'.
                                  format(fname, self.dataset_names[i]))
                            flist.remove(fname)
                        else:
                            print(
                                'Warning! File "{}" from dataset {} of {} does not exist.'
                                .format(fname, i + 1, N))
                            flist.remove(fname)

        # # load case matrix data to get descriptive case naming
        # if self.fname_case_matrix == '':
        #     print('Warning! No case matrix file provided, no case descriptions will be provided.')
        #     self.case_desc = ['Case ID %d' % i for i in range(M)]
        # else:
        #     cases = load_case_matrix(self.fname_case_matrix)
        #     self.case_desc = get_dlc_label(cases, include_seed=True)

        # get unique file namebase for datasets
        self.namebase = []
        if len(self.dataset_names) > 0:
            # use filename safe version of dataset names
            self.namebase = [
                "".join([
                    c for c in name
                    if c.isalpha() or c.isdigit() or c in ['_', '-']
                ]).rstrip() for i, name in zip(range(N), self.dataset_names)
            ]
        elif len(self.OpenFAST_outfile_list) > 0:
            # use out file naming
            if isinstance(self.OpenFAST_outfile_list[0], list):
                self.namebase = [
                    '_'.join(os.path.split(flist[0])[1].split('_')[:-1])
                    for flist in self.OpenFAST_outfile_list
                ]
            else:
                self.namebase = [
                    '_'.join(os.path.split(flist)[1].split('_')[:-1])
                    for flist in self.OpenFAST_outfile_list
                ]

        # check that names are unique
        if not len(self.namebase) == len(set(self.namebase)):
            self.namebase = []
        # as last resort, give generic name
        if not self.namebase:
            if isinstance(self.OpenFAST_outfile_list[0], str):
                # Just one dataset name for single dataset
                self.namebase = ['dataset1']
            else:
                self.namebase = [
                    'dataset' + ('{}'.format(i)).zfill(len(str(N - 1)))
                    for i in range(N)
                ]

        # Run design comparison if filenames list has multiple lists
        if (len(self.OpenFAST_outfile_list) > 1) and (isinstance(
                self.OpenFAST_outfile_list[0], list)):
            # Load stats and load rankings for design comparisons
            stats, load_rankings = self.design_comparison(
                self.OpenFAST_outfile_list)

        else:
            # Initialize Analysis
            loads_analysis = Analysis.Loads_Analysis()
            loads_analysis.verbose = self.verbose
            loads_analysis.t0 = self.t0
            loads_analysis.tf = self.tf

            # run analysis in parallel
            if self.parallel_analysis:
                pool = mp.Pool(self.parallel_cores)
                try:
                    stats_separate = pool.map(
                        partial(loads_analysis.full_loads_analysis,
                                get_load_ranking=False),
                        self.OpenFAST_outfile_list)
                except:
                    stats_separate = pool.map(
                        partial(loads_analysis.full_loads_analysis,
                                get_load_ranking=False),
                        self.OpenFAST_outfile_list[0])
                pool.close()
                pool.join()

                # Re-sort into the more "standard" dictionary/dataframe format we like
                stats = [
                    pdTools.dict2df(ss).unstack() for ss in stats_separate
                ]
                dft = pd.DataFrame(stats)
                dft = dft.reorder_levels([2, 0, 1], axis=1).sort_index(axis=1,
                                                                       level=0)
                stats = pdTools.df2dict(dft)

                # Get load rankings after stats are loaded
                load_rankings = loads_analysis.load_ranking(
                    stats, names=self.dataset_names, get_df=False)

            # run analysis in serial
            else:
                # Initialize Analysis
                loads_analysis = Analysis.Loads_Analysis()
                loads_analysis.verbose = self.verbose
                loads_analysis.t0 = self.t0
                loads_analysis.tf = self.tf

                stats, load_rankings = loads_analysis.full_loads_analysis(
                    self.OpenFAST_outfile_list, get_load_ranking=True)

        if self.save_SummaryStats:
            if isinstance(stats, dict):
                fname = self.namebase[0] + '_stats.yaml'
                if self.verbose:
                    print('Saving {}'.format(fname))
                save_yaml(self.results_dir, fname, stats)
            else:
                for namebase, st in zip(self.namebase, stats):
                    fname = namebase + '_stats.yaml'
                    if self.verbose:
                        print('Saving {}'.format(fname))
                    save_yaml(self.results_dir, fname, st)
        if self.save_LoadRanking:
            if isinstance(load_rankings, dict):
                fname = self.namebase[0] + '_LoadRanking.yaml'
                if self.verbose:
                    print('Saving {}'.format(fname))
                save_yaml(self.results_dir, fname, load_rankings)
            else:
                for namebase, lr in zip(self.namebase, load_rankings):
                    fname = namebase + '_LoadRanking.yaml'
                    if self.verbose:
                        print('Saving {}'.format(fname))
                    save_yaml(self.results_dir, fname, lr)

        return stats, load_rankings