def plot_dlc00(sim_ids, post_dirs, run_dirs, fig_dir_base=None, labels=None, cnames=[ 'dlc00_stair_wsp04_25_noturb.htc', 'dlc00_ramp_wsp04_25_04_noturb.htc' ], figsize=(14, 11)): """ This version is an update over plot_staircase. """ stairs = [] # if sim_id is a list, combine the two dataframes into one if type(sim_ids).__name__ == 'list': for ii, sim_id in enumerate(sim_ids): if isinstance(post_dirs, list): post_dir = post_dirs[ii] else: post_dir = post_dirs stairs.append(sim.Cases(post_dir, sim_id, rem_failed=True)) else: post_dir = post_dirs stairs.append(sim.Cases(post_dir, sim_id, rem_failed=True)) for cname in cnames: fp = PlotPerf(figsize=figsize) for i, cc in enumerate(stairs): if isinstance(cname, list): _cname = cname[i] else: _cname = cname if _cname in cc.cases_fail: print('no result for %s' % cc.sim_id) continue cc.change_results_dir(run_dirs[i]) try: res = cc.load_result_file(cc.cases[_cname]) except KeyError: for k in sorted(cc.cases.keys()): print(k) print('-' * 79) print(cc.sim_id, _cname) print('-' * 79) raise KeyError if labels is not None: label = labels[i] else: label = cc.sim_id fp.plot(res, label) dlcf = 'dlc' + cc.cases[_cname]['[DLC]'] fig_path = os.path.join(fig_dir_base, dlcf) fp.final(fig_path, _cname.replace('.htc', '.png'))
def prepare_failed(compress=False, wine_arch='win32', wine_prefix='~/.wine32', prelude='', zipchunks=False): cc = sim.Cases(POST_DIR, sim_id) df_tags = cc.cases2df() # ------------------------------------------------------------------------- # find failed cases and create pbs_in_failed dir cc.find_failed(df_cases=df_tags) sim.copy_pbs_in_failedcases(cc.cases_fail, path=opt.pbs_failed_path) if zipchunks: # and for chunks as well sorts_on = ['[DLC]', '[Windspeed]'] create_chunks_htc_pbs(cc.cases_fail, sort_by_values=sorts_on, ppn=20, nr_procs_series=3, walltime='20:00:00', chunks_dir='zip-chunks-jess-fail', compress=compress, wine_arch=wine_arch, wine_prefix=wine_prefix, prelude=prelude, queue='windq', i0=1000)
def post_processing(self, statistics=True, resdir=None, complib='blosc', calc_mech_power=False): """ Parameters ---------- resdir : str, default=None Defaults to reading the results from the [run_dir] tag. Force to any other directory using this variable. You can also use the presets as defined for runmethod in _set_path_config. """ post_dir = self.POST_DIR # ========================================================================= # check logfiles, results files, pbs output files # logfile analysis is written to a csv file in logfiles directory # ========================================================================= # load the file saved in post_dir cc = sim.Cases(post_dir, self.sim_id, rem_failed=False, complib=complib) if isinstance(resdir, str): forcedir = os.path.join(resdir, self.PROJECT, self.sim_id) cc.change_results_dir(forcedir) cc.post_launch() cc.remove_failed() if statistics: tags=['[windspeed]'] stats_df = cc.statistics(calc_mech_power=calc_mech_power, ch_fatigue=[], tags=tags, update=False) ftarget = os.path.join(self.POST_DIR, '%s_statistics.xlsx') stats_df.to_excel(ftarget % self.sim_id)
def postpro_node_merge(tqdm=False, zipchunks=False, m=[3, 4, 6, 8, 9, 10, 12]): """With postpro_node each individual case has a .csv file for the log file analysis and a .csv file for the statistics tables. Merge all these single files into one table/DataFrame. When using the zipchunk approach, all log file analysis and statistics are grouped into tar archives in the prepost-data directory. Parameters ---------- tqdm : boolean, default=False Set to True for displaying a progress bar (provided by the tqdm module) when merging all csv files into a single table/pd.DataFrame. zipchunks : boolean, default=False Set to True if merging post-processing files grouped into tar archives as generated by the zipchunks approach. """ # ------------------------------------------------------------------------- # MERGE POSTPRO ON NODE APPROACH INTO ONE DataFrame # ------------------------------------------------------------------------- lf = windIO.LogFile() path_pattern = os.path.join(P_RUN, 'logfiles', '*', '*.csv') if zipchunks: path_pattern = os.path.join(POST_DIR, 'loganalysis_chnk*.tar.xz') csv_fname = '%s_ErrorLogs.csv' % sim_id fcsv = os.path.join(POST_DIR, csv_fname) mdf = AppendDataFrames(tqdm=tqdm) # individual log file analysis does not have header, make sure to include # a line for the header mdf.txt2txt(fcsv, path_pattern, tarmode='r:xz', header=None, header_fjoined=lf._header(), recursive=True) # FIXME: this is due to bug in log file analysis. What is going on here?? # fix that some cases do not have enough columns with open(fcsv.replace('.csv', '2.csv'), 'w') as f1: with open(fcsv) as f2: for line in f2.readlines(): if len(line.split(';')) == 96: line = line.replace(';0.00000000000;nan;-0.0000;', '0.00000000000;nan;-0.0000;') f1.write(line) # convert from CSV to DataFrame df = lf.csv2df(fcsv.replace('.csv', '2.csv')) df.to_hdf(fcsv.replace('.csv', '.h5'), 'table') # ------------------------------------------------------------------------- path_pattern = os.path.join(P_RUN, 'res', '*', '*.csv') csv_fname = '%s_statistics.csv' % sim_id if zipchunks: path_pattern = os.path.join(POST_DIR, 'statsdel_chnk*.tar.xz') fcsv = os.path.join(POST_DIR, csv_fname) mdf = AppendDataFrames(tqdm=tqdm) # individual log file analysis does not have header, make sure to include # a line for the header mdf.txt2txt(fcsv, path_pattern, tarmode='r:xz', header=0, sep=',', header_fjoined=None, recursive=True, fname_col='[case_id]') # and convert to df: takes 2 minutes fdf = fcsv.replace('.csv', '.h5') store = pd.HDFStore(fdf, mode='w', format='table', complevel=9, complib='zlib') colnames = [ 'channel', 'max', 'min', 'mean', 'std', 'range', 'absmax', 'rms', 'int', 'intabs' ] colnames.extend(['m=%1.0f' % k for k in m]) colnames.extend(['[case_id]']) dtypes = {col: np.float64 for col in colnames} dtypes['channel'] = str dtypes['[case_id]'] = str # when using min_itemsize the column names should be valid variable names # mitemsize = {'channel':60, '[case_id]':60} mdf.csv2df_chunks(store, fcsv, chunksize=1000000, min_itemsize={}, sep=',', colnames=colnames, dtypes=dtypes, header=0) store.close() # ------------------------------------------------------------------------- # merge missing cols onto stats # FIXME: HAS TO BE THE SAME AS tags IN post_launch required = [ '[DLC]', '[run_dir]', '[wdir]', '[Windspeed]', '[res_dir]', '[case_id]', '[Case folder]' ] df = pd.read_hdf(fdf, 'table') # FIXME: why do some cases have a leading ./ (but most do not)? sel = df['[case_id]'].str.startswith('./') df.loc[sel, '[case_id]'] = df.loc[sel, '[case_id]'].str.replace('./', '', 1) # df now has case_id as the path to the statistics file: res/dlc12_xxx/yyy # while df_tags will have just yyy as case_id tmp = df['[case_id]'].str.split('/', expand=True) df['[case_id]'] = tmp[tmp.columns[-1]] cc = sim.Cases(POST_DIR, sim_id) df_tags = cc.cases2df() df_stats = pd.merge(df, df_tags[required], on=['[case_id]']) # if the merge didn't work due to other misaligned case_id tags, do not # overwrite our otherwise ok tables! if len(df_stats) != len(df): print('failed to merge required tags, something is wrong!') # find out which cases we lost and why print('number of entries lost:', len(df) - len(df_stats)) s_df = set(df['[case_id]'].unique()) s_stats = set(df_stats['[case_id]'].unique()) print('nr of channels:', len(df['channel'].unique())) msg = 'nr of case_ids lost:' print(msg, (len(df) - len(df_stats)) / len(df['channel'].unique())) print('following case_ids have mysteriously disappeared:') print(s_df - s_stats) return df_stats.to_hdf(fdf, 'table', mode='w') df_stats.to_csv(fdf.replace('.h5', '.csv')) # ------------------------------------------------------------------------- # save channel list chans = df_stats['channel'].unique() chans.sort() fname = os.path.join(POST_DIR, '%s_unique-channel-names.csv' % sim_id) pd.DataFrame(chans).to_csv(fname)
def post_launch(sim_id, statistics=True, rem_failed=True, check_logs=True, force_dir=False, update=False, saveinterval=2000, csv=False, m=[3, 4, 6, 8, 9, 10, 12], neq=1e7, no_bins=46, int_env=False, years=20.0, fatigue=True, A=None, AEP=False, nx=300, save_new_sigs=False, envelopeturbine=False, envelopeblade=False, save_iter=False, pbs_failed_path=False): # ========================================================================= # check logfiles, results files, pbs output files # logfile analysis is written to a csv file in logfiles directory # ========================================================================= # load the file saved in post_dir config = {} config['Weibull'] = {} config['Weibull']['Vr'] = 11. config['Weibull']['Vref'] = 50 config['nn_shaft'] = 4 cc = sim.Cases(POST_DIR, sim_id, rem_failed=rem_failed, config=config) if force_dir: for case in cc.cases: cc.cases[case]['[post_dir]'] = POST_DIR cc.cases[case]['[run_dir]'] = force_dir if check_logs: cc.post_launch(save_iter=save_iter, pbs_failed_path=pbs_failed_path) elif rem_failed: cc.remove_failed() # using suffix is only relevant if we have more cases then the save interval if len(cc.cases) > saveinterval: suffix = True else: suffix = False df_stats, df_AEP, df_Leq = None, None, None if statistics: i0, i1 = 0, -1 # example for combination of signals # name = 'stress1' # expr = '[p1-p1-node-002-forcevec-z]*3 + [p1-p1-node-002-forcevec-y]' # add_sigs = {name:expr} # in addition, sim_id and case_id are always added by default # FIXME: HAS TO BE THE SAME AS required IN postpro_node_merge tags = [ '[Case folder]', '[run_dir]', '[res_dir]', '[DLC]', '[wsp]', '[Windspeed]', '[wdir]' ] add = None # general statistics for all channels channel # set neq=None here to calculate 1Hz equivalent loads df_stats = cc.statistics(calc_mech_power=False, i0=i0, i1=i1, tags=tags, add_sensor=add, ch_fatigue=None, update=update, saveinterval=saveinterval, suffix=suffix, save_new_sigs=save_new_sigs, csv=csv, m=m, neq=None, no_bins=no_bins, chs_resultant=[], A=A, add_sigs={}) # save channel list chans = df_stats['channel'].unique() chans.sort() fname = os.path.join(POST_DIR, '%s_unique-channel-names.csv' % sim_id) pd.DataFrame(chans).to_csv(fname) # annual energy production if AEP: # load the statistics in case they are missing if not statistics: df_stats, Leq_df, AEP_df = cc.load_stats() # CAUTION: depending on the type output, electrical power can be two # different things with the DTU Wind Energy Controller. # Either manually set ch_powe to the correct value or use simple # mechanism to figure out which one of two expected values it is. if 'DLL-2-inpvec-2' in df_stats['channel'].unique(): ch_powe = 'DLL-2-inpvec-2' elif 'DLL-dtu_we_controller-inpvec-2' in df_stats['channel'].unique(): ch_powe = 'DLL-dtu_we_controller-inpvec-2' df_AEP = cc.AEP(df_stats, csv=csv, update=update, save=True, ch_powe=ch_powe) if envelopeblade: ch_list = [] for iblade in range(1, 4): for i in range(1, 18): rpl = (iblade, iblade, i) ch_list.append([ 'blade%i-blade%i-node-%3.3i-momentvec-x' % rpl, 'blade%i-blade%i-node-%3.3i-momentvec-y' % rpl, 'blade%i-blade%i-node-%3.3i-momentvec-z' % rpl, 'blade%i-blade%i-node-%3.3i-forcevec-x' % rpl, 'blade%i-blade%i-node-%3.3i-forcevec-y' % rpl, 'blade%i-blade%i-node-%3.3i-forcevec-z' % rpl ]) cc.envelopes(ch_list=ch_list, append='_blade', int_env=int_env, Nx=nx) if envelopeturbine: ch_list = [[ 'tower-tower-node-001-momentvec-x', 'tower-tower-node-001-momentvec-y', 'tower-tower-node-001-momentvec-z' ], [ 'tower-tower-node-022-momentvec-x', 'tower-tower-node-022-momentvec-y', 'tower-tower-node-022-momentvec-z', 'tower-tower-node-022-forcevec-x', 'tower-tower-node-022-forcevec-y', 'tower-tower-node-022-forcevec-z' ], [ 'hub1-hub1-node-001-momentvec-x', 'hub1-hub1-node-001-momentvec-y', 'hub1-hub1-node-001-momentvec-z' ]] cc.envelopes(ch_list=ch_list, append='_turbine', int_env=int_env, Nx=nx) if fatigue: # load the statistics in case they are missing if not statistics: df_stats, Leq_df, AEP_df = cc.load_stats() # life time equivalent load for all channels df_Leq = cc.fatigue_lifetime(df_stats, neq, csv=csv, update=update, years=years, save=True) return df_stats, df_AEP, df_Leq
def launch_dlcs_excel(sim_id, silent=False, verbose=False, pbs_turb=False, runmethod=None, write_htc=True, zipchunks=False, walltime='04:00:00', postpro_node=False, compress=False, dlcs_dir='htc/DLCs', postpro_node_zipchunks=True, wine_arch='win32', wine_prefix='~/.wine32', m=[3, 4, 6, 8, 9, 10, 12], prelude='', linux=False): """ Launch load cases defined in Excel files """ iter_dict = dict() iter_dict['[empty]'] = [False] if postpro_node or postpro_node_zipchunks: # pyenv = 'py36-wetb' pyenv = 'wetb_py3' else: pyenv = None # FIXME: THIS IS VERY MESSY, we have wine_prefix/arch and exesingle/chunks if linux: wine_arch = None wine_prefix = None prelude = 'module load mpi/openmpi_1.6.5_intelv14.0.0\n' # if linux: # pyenv = 'wetb_py3' # pyenv_cmd = 'source /home/python/miniconda3/bin/activate' # exesingle = "{hawc2_exe:} {fname_htc:}" # exechunks = "({winenumactl:} {hawc2_exe:} {fname_htc:}) " # exechunks += "2>&1 | tee {fname_pbs_out:}" # else: # pyenv = '' # pyenv_cmd = 'source /home/ozgo/bin/activate_hawc2cfd.sh' # exesingle = "time {hawc2_exe:} {fname_htc:}" # exechunks = "(time numactl --physcpubind=$CPU_NR {hawc2_exe:} {fname_htc:}) " # exechunks += "2>&1 | tee {fname_pbs_out:}" # see if a htc/DLCs dir exists # Load all DLC definitions and make some assumptions on tags that are not # defined if os.path.exists(dlcs_dir): opt_tags = dlcdefs.excel_stabcon(dlcs_dir, silent=silent, p_source=P_SOURCE) else: opt_tags = dlcdefs.excel_stabcon(os.path.join(P_SOURCE, 'htc'), silent=silent, p_source=P_SOURCE) if len(opt_tags) < 1: raise ValueError('There are is not a single case defined. Make sure ' 'the DLC spreadsheets are configured properly.') # add all the root files, except anything with *.zip f_ziproot = [] for (dirpath, dirnames, fnames) in os.walk(P_SOURCE): # remove all zip files for i, fname in enumerate(fnames): if not fname.endswith('.zip'): f_ziproot.append(fname) break # and add those files for opt in opt_tags: opt['[zip_root_files]'] = f_ziproot if runmethod == None: runmethod = RUNMETHOD master = master_tags(sim_id, runmethod=runmethod, silent=silent, verbose=verbose) master.tags['[sim_id]'] = sim_id master.tags['[walltime]'] = walltime master.output_dirs.append('[Case folder]') master.output_dirs.append('[Case id.]') # TODO: copy master and DLC exchange files to p_root too!! # all tags set in master_tags will be overwritten by the values set in # variable_tag_func(), iter_dict and opt_tags # values set in iter_dict have precedence over opt_tags vartag_func() # has precedense over iter_dict, which has precedence over opt_tags. # dlcdefs.vartag_excel_stabcon adds support for creating hydro files vartag_func = dlcdefs.vartag_excel_stabcon cases = sim.prepare_launch(iter_dict, opt_tags, master, vartag_func, write_htc=write_htc, runmethod=runmethod, copyback_turb=True, update_cases=False, msg='', ignore_non_unique=False, run_only_new=False, pbs_fname_appendix=False, short_job_names=False, silent=silent, verbose=verbose, pyenv=pyenv, m=[3, 4, 6, 8, 9, 10, 12], postpro_node=postpro_node, exechunks=None, exesingle=None, prelude=prelude, postpro_node_zipchunks=postpro_node_zipchunks, wine_arch=wine_arch, wine_prefix=wine_prefix) if pbs_turb: # to avoid confusing HAWC2 simulations and Mann64 generator PBS files, # MannTurb64 places PBS launch scripts in a "pbs_in_turb" folder mann64 = sim.MannTurb64(silent=silent) mann64.walltime = '00:59:59' mann64.queue = 'workq' mann64.gen_pbs(cases) if zipchunks: # create chunks # sort so we have minimal copying turb files from mimer to node/scratch # note that walltime here is for running all cases assigned to the # respective nodes. It is not walltime per case. sorts_on = ['[DLC]', '[Windspeed]'] create_chunks_htc_pbs(cases, sort_by_values=sorts_on, queue='workq', ppn=20, nr_procs_series=3, walltime='20:00:00', chunks_dir='zip-chunks-jess', compress=compress, wine_arch=wine_arch, wine_prefix=wine_prefix, prelude=prelude) # create_chunks_htc_pbs(cases, sort_by_values=sorts_on, queue='workq', # ppn=12, nr_procs_series=3, walltime='20:00:00', # chunks_dir='zip-chunks-gorm', compress=compress, # wine_arch=wine_arch, wine_prefix=wine_prefix) df = sim.Cases(cases).cases2df() df.to_excel(os.path.join(POST_DIR, sim_id + '.xls'))
def plot_staircase(sim_ids, post_dirs, run_dirs, fig_dir_base=None, cname='dlc00_stair_wsp04_25_noturb.htc'): """ Default stair and ramp names: dlc00_stair_wsp04_25_noturb dlc00_ramp_wsp04_25_04_noturb """ stairs = [] col = ['r', 'k'] alf = [1.0, 0.7] # if sim_id is a list, combine the two dataframes into one if type(sim_ids).__name__ == 'list': for ii, sim_id in enumerate(sim_ids): if isinstance(post_dirs, list): post_dir = post_dirs[ii] else: post_dir = post_dirs stairs.append(sim.Cases(post_dir, sim_id, rem_failed=True)) else: sim_id = sim_ids sim_ids = [sim_id] post_dir = post_dirs stairs.append(sim.Cases(post_dir, sim_id, rem_failed=True)) fig, axes = mplutils.make_fig(nrows=3, ncols=1, figsize=(14, 10)) ax = axes.ravel() for i, cc in enumerate(stairs): if cname in cc.cases_fail: print('no result for %s' % cc.sim_id) continue cc.change_results_dir(run_dirs[i]) res = cc.load_result_file(cc.cases[cname]) respath = cc.cases[cname]['[run_dir]'] fname = os.path.join(respath, cname) df_respost = pd.read_hdf(fname + '_postres.h5', 'table') sim_id = cc.sim_id time = res.sig[:, 0] t0, t1 = time[0], time[-1] # find the wind speed for channame, chan in res.ch_dict.items(): if channame.startswith('windspeed-global-Vy-0.00-0.00'): break wind = res.sig[:, chan['chi']] chi = res.ch_dict['bearing-pitch1-angle-deg']['chi'] pitch = res.sig[:, chi] chi = res.ch_dict['bearing-shaft_rot-angle_speed-rpm']['chi'] rpm = res.sig[:, chi] chi = res.ch_dict['bearing-pitch1-angle-deg']['chi'] pitch = res.sig[:, chi] chi = res.ch_dict['tower-tower-node-001-momentvec-x']['chi'] tx = res.sig[:, chi] chi = res.ch_dict['tower-tower-node-001-momentvec-y']['chi'] ty = res.sig[:, chi] chi = res.ch_dict['DLL-2-inpvec-2']['chi'] power = res.sig[:, chi] chi = res.ch_dict['DLL-2-inpvec-2']['chi'] power_mech = df_respost['stats-shaft-power'] ax[0].plot(time, wind, col[i] + '--', label='%s wind speed' % sim_id, alpha=alf[i]) ax[0].plot(time, pitch, col[i] + '-.', label='%s pitch' % sim_id, alpha=alf[i]) ax[0].plot(time, rpm, col[i] + '-', label='%s RPM' % sim_id, alpha=alf[i]) ax[1].plot(time, tx, col[i] + '--', label='%s Tower FA' % sim_id, alpha=alf[i]) ax[1].plot(time, ty, col[i] + '-', label='%s Tower SS' % sim_id, alpha=alf[i]) ax[2].plot(time, power / 1e6, col[i] + '-', label='%s El Power' % sim_id, alpha=alf[i]) ax[2].plot(time, power_mech / 1e3, col[i] + '-', alpha=alf[i], label='%s Mech Power' % sim_id) ax[0].set_xlim([t0, t1]) ax[0].grid() ax[0].legend(loc='best') ax[0].set_xticklabels([]) # ax[0].set_xlabel('time [s]') ax[1].set_xlim([t0, t1]) ax[1].grid() ax[1].legend(loc='best') ax[1].set_xticklabels([]) # ax[1].set_xlabel('time [s]') ax[2].set_xlim([t0, t1]) ax[2].grid() ax[2].legend(loc='best') ax[2].set_xlabel('time [s]') fig.tight_layout() fig.subplots_adjust(hspace=0.06) fig.subplots_adjust(top=0.92) if not os.path.exists(fig_dir_base): os.makedirs(fig_dir_base) fig_path = os.path.join(fig_dir_base, '-'.join(sim_ids) + '_stair.png') print('saving: %s ...' % fig_path, end='') fig.savefig(fig_path) #.encode('latin-1') print('done') fig.clear()
def load_stats(self, sim_ids, post_dirs, post_dir_save=False): self.sim_ids = sim_ids self.post_dirs = post_dirs # reduce required memory, only use following columns cols = [ '[run_dir]', '[DLC]', 'channel', '[res_dir]', '[Windspeed]', 'mean', 'max', 'min', 'std', '[wdir]' ] # if sim_id is a list, combine the two dataframes into one df_stats = pd.DataFrame() if type(sim_ids).__name__ == 'list': for ii, sim_id in enumerate(sim_ids): if isinstance(post_dirs, list): post_dir = post_dirs[ii] else: post_dir = post_dirs cc = sim.Cases(post_dir, sim_id, rem_failed=True) df_stats, _, _ = cc.load_stats(columns=cols, leq=False) print('%s Cases loaded.' % sim_id) # if specified, save the merged sims elsewhere if post_dir_save: fpath = os.path.join(post_dir_save, '-'.join(sim_ids) + '.h5') try: os.makedirs(post_dir_save) except OSError: pass else: fpath = os.path.join(post_dir, '-'.join(sim_ids) + '.h5') if ii == 0: # and save somewhere so we can add the second data frame on # disc df_stats.to_hdf(fpath, 'table', mode='w', format='table', complevel=9, complib='blosc') print('%s merged stats written to: %s' % (sim_id, fpath)) else: # instead of doing a concat in memory, add to the hdf store df_stats.to_hdf(fpath, 'table', mode='r+', format='table', complevel=9, complib='blosc', append=True) print('%s merging stats into: %s' % (sim_id, fpath)) # we might run into memory issues del df_stats, _, cc gc.collect() # and load the reduced combined set print('loading merged stats: %s' % fpath) df_stats = pd.read_hdf(fpath, 'table') else: sim_id = sim_ids sim_ids = [sim_id] post_dir = post_dirs cc = sim.Cases(post_dir, sim_id, rem_failed=True) df_stats, _, _ = cc.load_stats(leq=False) return df_stats
def merge_sim_ids(sim_ids, post_dirs, post_dir_save=False, columns=None): """ """ cols_extra = ['[run_dir]', '[res_dir]', '[wdir]', '[DLC]', '[Case folder]'] min_itemsize = { 'channel': 100, '[run_dir]': 100, '[res_dir]': 100, '[DLC]': 10, '[Case folder]': 100 } # map the run_dir to the same order as the post_dirs, labels run_dirs = [] # avoid saving merged cases if there is only one! if type(sim_ids).__name__ == 'list' and len(sim_ids) == 1: sim_ids = sim_ids[0] # if sim_id is a list, combine the two dataframes into one df_stats = pd.DataFrame() if type(sim_ids).__name__ == 'list': for ii, sim_id in enumerate(sim_ids): if isinstance(post_dirs, list): post_dir = post_dirs[ii] else: post_dir = post_dirs cc = sim.Cases(post_dir, sim_id, rem_failed=True) df_stats, _, _ = cc.load_stats(leq=False) if columns is not None: df_stats = df_stats[columns] # stats has only a few columns identifying the different cases # add some more for selecting them dfc = cc.cases2df() if '[wsp]' in dfc.columns: wsp = '[wsp]' else: wsp = '[Windspeed]' # columns we want to add from cc.cases (cases dict) to stats cols_cc = set(cols_extra + [wsp]) # do not add column twice, some might already be in df stats add_cols = list(cols_cc - set(df_stats.columns)) add_cols.append('[case_id]') dfc = dfc[add_cols] df_stats = pd.merge(df_stats, dfc, on='[case_id]') # FIXME: this is very messy, we can end up with both [wsp] and # [Windspeed] columns if '[Windspeed]' in df_stats.columns and '[wsp]' in df_stats.columns: df_stats.drop('[wsp]', axis=1, inplace=True) if wsp != '[Windspeed]': df_stats.rename(columns={wsp: '[Windspeed]'}, inplace=True) # map the run_dir to the same order as the post_dirs, labels run_dirs.append(df_stats['[run_dir]'].unique()[0]) print('%s Cases loaded.' % sim_id) # if specified, save the merged sims elsewhere if post_dir_save: fpath = os.path.join(post_dir_save, '-'.join(sim_ids) + '.h5') try: os.makedirs(post_dir_save) except OSError: pass else: fpath = os.path.join(post_dir, '-'.join(sim_ids) + '.h5') fmerged = fpath.replace('.h5', '_statistics.h5') if ii == 0: # and save somewhere so we can add the second data frame on # disc store = pd.HDFStore(fmerged, mode='w', complevel=9, complib='zlib') store.append('table', df_stats, min_itemsize=min_itemsize) print(store.get_storer('table').table.description) # df_stats.to_hdf(fmerged, 'table', mode='w', format='table', # complevel=9, complib='blosc') print('%s merged stats written to: %s' % (sim_id, fpath)) else: # instead of doing a concat in memory, add to the hdf store store.append('table', df_stats) # will fail if there are longer string columns compared to ii=0 # df_stats.to_hdf(fmerged, 'table', mode='r+', format='table', # complevel=9, complib='blosc', append=True) print('%s merging stats into: %s' % (sim_id, fpath)) # df_stats = pd.concat([df_stats, df_stats2], ignore_index=True) # df_stats2 = None # we might run into memory issues del df_stats, _, cc gc.collect() store.close() # and load the reduced combined set print('loading merged stats: %s' % fmerged) df_stats = pd.read_hdf(fmerged, 'table') else: sim_id = sim_ids sim_ids = [sim_id] post_dir = post_dirs if isinstance(post_dirs, list): post_dir = post_dirs[0] cc = sim.Cases(post_dir, sim_id, rem_failed=True) df_stats, _, _ = cc.load_stats(columns=columns, leq=False) if columns is not None: df_stats = df_stats[columns] run_dirs = [df_stats['[run_dir]'].unique()[0]] # stats has only a few columns identifying the different cases # add some more for selecting them dfc = cc.cases2df() if '[wsp]' in dfc.columns: wsp = '[wsp]' else: wsp = '[Windspeed]' # columns we want to add from cc.cases (cases dict) to stats cols_cc = set(cols_extra + [wsp]) # do not add column twice, some might already be in df stats add_cols = list(cols_cc - set(df_stats.columns)) add_cols.append('[case_id]') dfc = dfc[add_cols] df_stats = pd.merge(df_stats, dfc, on='[case_id]') if '[Windspeed]' in df_stats.columns and '[wsp]' in df_stats.columns: df_stats.drop('[wsp]', axis=1, inplace=True) if wsp != '[Windspeed]': df_stats.rename(columns={wsp: '[Windspeed]'}, inplace=True) return run_dirs, df_stats
def test_leq_life(self): """Verify if prepost.Simulation.Cases.fatigue_lifetime() returns the expected life time equivalent load. """ # --------------------------------------------------------------------- # very simple case cases = {'case1':{'[post_dir]':'no-path', '[sim_id]':'A0'}, 'case2':{'[post_dir]':'no-path', '[sim_id]':'A0'}} cc = sim.Cases(cases) fh_list = [('case1', 10/3600), ('case2', 20/3600)] dfs = pd.DataFrame({'m=1.0' : [2, 3], 'channel' : ['channel1', 'channel1'], '[case_id]' : ['case1', 'case2']}) neq_life = 1.0 df_Leq = cc.fatigue_lifetime(dfs, neq_life, fh_lst=fh_list, save=False, update=False, csv=False, xlsx=False, silent=False) np.testing.assert_allclose(df_Leq['m=1.0'].values, 2*10 + 3*20) self.assertTrue(df_Leq['channel'].values[0]=='channel1') # --------------------------------------------------------------------- # slightly more complicated neq_life = 3.0 df_Leq = cc.fatigue_lifetime(dfs, neq_life, fh_lst=fh_list, save=False, update=False, csv=False, xlsx=False, silent=False) np.testing.assert_allclose(df_Leq['m=1.0'].values, (2*10 + 3*20)/neq_life) # --------------------------------------------------------------------- # a bit more complex and also test the sorting of fh_lst and dfs cases = {'case1':{'[post_dir]':'no-path', '[sim_id]':'A0'}, 'case2':{'[post_dir]':'no-path', '[sim_id]':'A0'}, 'case3':{'[post_dir]':'no-path', '[sim_id]':'A0'}, 'case4':{'[post_dir]':'no-path', '[sim_id]':'A0'}} cc = sim.Cases(cases) fh_list = [('case3', 10/3600), ('case2', 20/3600), ('case1', 50/3600), ('case4', 40/3600)] dfs = pd.DataFrame({'m=3.0' : [2, 3, 4, 5], 'channel' : ['channel1']*4, '[case_id]' : ['case4', 'case2', 'case3', 'case1']}) neq_life = 5.0 df_Leq = cc.fatigue_lifetime(dfs, neq_life, fh_lst=fh_list, save=False, update=False, csv=False, xlsx=False, silent=False) expected = ((2*2*2*40 + 3*3*3*20 + 4*4*4*10 + 5*5*5*50)/5)**(1/3) np.testing.assert_allclose(df_Leq['m=3.0'].values, expected) # --------------------------------------------------------------------- # more cases and with sorting base = {'[post_dir]':'no-path', '[sim_id]':'A0'} cases = {'case%i' % k : base for k in range(50)} cc = sim.Cases(cases) # reverse the order of how they appear in dfs and fh_lst fh_list = [('case%i' % k, k*10/3600) for k in range(49,-1,-1)] dfs = pd.DataFrame({'m=5.2' : np.arange(1,51,1), 'channel' : ['channel1']*50, '[case_id]' : ['case%i' % k for k in range(50)]}) df_Leq = cc.fatigue_lifetime(dfs, neq_life, fh_lst=fh_list, save=False, update=False, csv=False, xlsx=False, silent=False) expected = np.sum(np.power(np.arange(1,51,1), 5.2)*np.arange(0,50,1)*10) expected = np.power(expected/neq_life, 1/5.2) np.testing.assert_allclose(df_Leq['m=5.2'].values, expected)