def logcheck(fname, fsave=None, mode='w'): """Check the log file of a single HAWC2 simulation and save results to textfile. """ logf = windIO.LogFile() logf.readlog(fname) contents = logf._msglistlog2csv('') if fsave is None: fsave = fname.replace('.log', '.csv') with open(fsave, mode) as f: f.write(contents)
def postpro_node_merge(tqdm=False, zipchunks=False, m=[3, 4, 6, 8, 9, 10, 12]): """With postpro_node each individual case has a .csv file for the log file analysis and a .csv file for the statistics tables. Merge all these single files into one table/DataFrame. When using the zipchunk approach, all log file analysis and statistics are grouped into tar archives in the prepost-data directory. Parameters ---------- tqdm : boolean, default=False Set to True for displaying a progress bar (provided by the tqdm module) when merging all csv files into a single table/pd.DataFrame. zipchunks : boolean, default=False Set to True if merging post-processing files grouped into tar archives as generated by the zipchunks approach. """ # ------------------------------------------------------------------------- # MERGE POSTPRO ON NODE APPROACH INTO ONE DataFrame # ------------------------------------------------------------------------- lf = windIO.LogFile() path_pattern = os.path.join(P_RUN, 'logfiles', '*', '*.csv') if zipchunks: path_pattern = os.path.join(POST_DIR, 'loganalysis_chnk*.tar.xz') csv_fname = '%s_ErrorLogs.csv' % sim_id fcsv = os.path.join(POST_DIR, csv_fname) mdf = AppendDataFrames(tqdm=tqdm) # individual log file analysis does not have header, make sure to include # a line for the header mdf.txt2txt(fcsv, path_pattern, tarmode='r:xz', header=None, header_fjoined=lf._header(), recursive=True) # FIXME: this is due to bug in log file analysis. What is going on here?? # fix that some cases do not have enough columns with open(fcsv.replace('.csv', '2.csv'), 'w') as f1: with open(fcsv) as f2: for line in f2.readlines(): if len(line.split(';')) == 96: line = line.replace(';0.00000000000;nan;-0.0000;', '0.00000000000;nan;-0.0000;') f1.write(line) # convert from CSV to DataFrame df = lf.csv2df(fcsv.replace('.csv', '2.csv')) df.to_hdf(fcsv.replace('.csv', '.h5'), 'table') # ------------------------------------------------------------------------- path_pattern = os.path.join(P_RUN, 'res', '*', '*.csv') csv_fname = '%s_statistics.csv' % sim_id if zipchunks: path_pattern = os.path.join(POST_DIR, 'statsdel_chnk*.tar.xz') fcsv = os.path.join(POST_DIR, csv_fname) mdf = AppendDataFrames(tqdm=tqdm) # individual log file analysis does not have header, make sure to include # a line for the header mdf.txt2txt(fcsv, path_pattern, tarmode='r:xz', header=0, sep=',', header_fjoined=None, recursive=True, fname_col='[case_id]') # and convert to df: takes 2 minutes fdf = fcsv.replace('.csv', '.h5') store = pd.HDFStore(fdf, mode='w', format='table', complevel=9, complib='zlib') colnames = [ 'channel', 'max', 'min', 'mean', 'std', 'range', 'absmax', 'rms', 'int', 'intabs' ] colnames.extend(['m=%1.0f' % k for k in m]) colnames.extend(['[case_id]']) dtypes = {col: np.float64 for col in colnames} dtypes['channel'] = str dtypes['[case_id]'] = str # when using min_itemsize the column names should be valid variable names # mitemsize = {'channel':60, '[case_id]':60} mdf.csv2df_chunks(store, fcsv, chunksize=1000000, min_itemsize={}, sep=',', colnames=colnames, dtypes=dtypes, header=0) store.close() # ------------------------------------------------------------------------- # merge missing cols onto stats # FIXME: HAS TO BE THE SAME AS tags IN post_launch required = [ '[DLC]', '[run_dir]', '[wdir]', '[Windspeed]', '[res_dir]', '[case_id]', '[Case folder]' ] df = pd.read_hdf(fdf, 'table') # FIXME: why do some cases have a leading ./ (but most do not)? sel = df['[case_id]'].str.startswith('./') df.loc[sel, '[case_id]'] = df.loc[sel, '[case_id]'].str.replace('./', '', 1) # df now has case_id as the path to the statistics file: res/dlc12_xxx/yyy # while df_tags will have just yyy as case_id tmp = df['[case_id]'].str.split('/', expand=True) df['[case_id]'] = tmp[tmp.columns[-1]] cc = sim.Cases(POST_DIR, sim_id) df_tags = cc.cases2df() df_stats = pd.merge(df, df_tags[required], on=['[case_id]']) # if the merge didn't work due to other misaligned case_id tags, do not # overwrite our otherwise ok tables! if len(df_stats) != len(df): print('failed to merge required tags, something is wrong!') # find out which cases we lost and why print('number of entries lost:', len(df) - len(df_stats)) s_df = set(df['[case_id]'].unique()) s_stats = set(df_stats['[case_id]'].unique()) print('nr of channels:', len(df['channel'].unique())) msg = 'nr of case_ids lost:' print(msg, (len(df) - len(df_stats)) / len(df['channel'].unique())) print('following case_ids have mysteriously disappeared:') print(s_df - s_stats) return df_stats.to_hdf(fdf, 'table', mode='w') df_stats.to_csv(fdf.replace('.h5', '.csv')) # ------------------------------------------------------------------------- # save channel list chans = df_stats['channel'].unique() chans.sort() fname = os.path.join(POST_DIR, '%s_unique-channel-names.csv' % sim_id) pd.DataFrame(chans).to_csv(fname)
def readlog(self, fname): log = windIO.LogFile() log.readlog(os.path.join(self.logpath, fname)) return log