Beispiel #1
0
def logcheck(fname, fsave=None, mode='w'):
    """Check the log file of a single HAWC2 simulation and save results to
    textfile.
    """

    logf = windIO.LogFile()
    logf.readlog(fname)
    contents = logf._msglistlog2csv('')
    if fsave is None:
        fsave = fname.replace('.log', '.csv')
    with open(fsave, mode) as f:
        f.write(contents)
Beispiel #2
0
def postpro_node_merge(tqdm=False, zipchunks=False, m=[3, 4, 6, 8, 9, 10, 12]):
    """With postpro_node each individual case has a .csv file for the log file
    analysis and a .csv file for the statistics tables. Merge all these single
    files into one table/DataFrame.

    When using the zipchunk approach, all log file analysis and statistics
    are grouped into tar archives in the prepost-data directory.

    Parameters
    ----------

    tqdm : boolean, default=False
        Set to True for displaying a progress bar (provided by the tqdm module)
        when merging all csv files into a single table/pd.DataFrame.

    zipchunks : boolean, default=False
        Set to True if merging post-processing files grouped into tar archives
        as generated by the zipchunks approach.

    """
    # -------------------------------------------------------------------------
    # MERGE POSTPRO ON NODE APPROACH INTO ONE DataFrame
    # -------------------------------------------------------------------------
    lf = windIO.LogFile()
    path_pattern = os.path.join(P_RUN, 'logfiles', '*', '*.csv')
    if zipchunks:
        path_pattern = os.path.join(POST_DIR, 'loganalysis_chnk*.tar.xz')
    csv_fname = '%s_ErrorLogs.csv' % sim_id
    fcsv = os.path.join(POST_DIR, csv_fname)
    mdf = AppendDataFrames(tqdm=tqdm)
    # individual log file analysis does not have header, make sure to include
    # a line for the header
    mdf.txt2txt(fcsv,
                path_pattern,
                tarmode='r:xz',
                header=None,
                header_fjoined=lf._header(),
                recursive=True)

    # FIXME: this is due to bug in log file analysis. What is going on here??
    # fix that some cases do not have enough columns
    with open(fcsv.replace('.csv', '2.csv'), 'w') as f1:
        with open(fcsv) as f2:
            for line in f2.readlines():
                if len(line.split(';')) == 96:
                    line = line.replace(';0.00000000000;nan;-0.0000;',
                                        '0.00000000000;nan;-0.0000;')
                f1.write(line)

    # convert from CSV to DataFrame
    df = lf.csv2df(fcsv.replace('.csv', '2.csv'))
    df.to_hdf(fcsv.replace('.csv', '.h5'), 'table')
    # -------------------------------------------------------------------------
    path_pattern = os.path.join(P_RUN, 'res', '*', '*.csv')
    csv_fname = '%s_statistics.csv' % sim_id
    if zipchunks:
        path_pattern = os.path.join(POST_DIR, 'statsdel_chnk*.tar.xz')
    fcsv = os.path.join(POST_DIR, csv_fname)
    mdf = AppendDataFrames(tqdm=tqdm)
    # individual log file analysis does not have header, make sure to include
    # a line for the header
    mdf.txt2txt(fcsv,
                path_pattern,
                tarmode='r:xz',
                header=0,
                sep=',',
                header_fjoined=None,
                recursive=True,
                fname_col='[case_id]')
    # and convert to df: takes 2 minutes
    fdf = fcsv.replace('.csv', '.h5')
    store = pd.HDFStore(fdf,
                        mode='w',
                        format='table',
                        complevel=9,
                        complib='zlib')
    colnames = [
        'channel', 'max', 'min', 'mean', 'std', 'range', 'absmax', 'rms',
        'int', 'intabs'
    ]
    colnames.extend(['m=%1.0f' % k for k in m])
    colnames.extend(['[case_id]'])
    dtypes = {col: np.float64 for col in colnames}
    dtypes['channel'] = str
    dtypes['[case_id]'] = str
    # when using min_itemsize the column names should be valid variable names
    # mitemsize = {'channel':60, '[case_id]':60}
    mdf.csv2df_chunks(store,
                      fcsv,
                      chunksize=1000000,
                      min_itemsize={},
                      sep=',',
                      colnames=colnames,
                      dtypes=dtypes,
                      header=0)
    store.close()
    # -------------------------------------------------------------------------
    # merge missing cols onto stats
    # FIXME: HAS TO BE THE SAME AS tags IN post_launch
    required = [
        '[DLC]', '[run_dir]', '[wdir]', '[Windspeed]', '[res_dir]',
        '[case_id]', '[Case folder]'
    ]
    df = pd.read_hdf(fdf, 'table')

    # FIXME: why do some cases have a leading ./ (but most do not)?
    sel = df['[case_id]'].str.startswith('./')
    df.loc[sel, '[case_id]'] = df.loc[sel,
                                      '[case_id]'].str.replace('./', '', 1)
    # df now has case_id as the path to the statistics file: res/dlc12_xxx/yyy
    # while df_tags will have just yyy as case_id
    tmp = df['[case_id]'].str.split('/', expand=True)
    df['[case_id]'] = tmp[tmp.columns[-1]]

    cc = sim.Cases(POST_DIR, sim_id)
    df_tags = cc.cases2df()
    df_stats = pd.merge(df, df_tags[required], on=['[case_id]'])
    # if the merge didn't work due to other misaligned case_id tags, do not
    # overwrite our otherwise ok tables!
    if len(df_stats) != len(df):
        print('failed to merge required tags, something is wrong!')
        # find out which cases we lost and why
        print('number of entries lost:', len(df) - len(df_stats))
        s_df = set(df['[case_id]'].unique())
        s_stats = set(df_stats['[case_id]'].unique())
        print('nr of channels:', len(df['channel'].unique()))
        msg = 'nr of case_ids lost:'
        print(msg, (len(df) - len(df_stats)) / len(df['channel'].unique()))
        print('following case_ids have mysteriously disappeared:')
        print(s_df - s_stats)
        return
    df_stats.to_hdf(fdf, 'table', mode='w')
    df_stats.to_csv(fdf.replace('.h5', '.csv'))

    # -------------------------------------------------------------------------
    # save channel list
    chans = df_stats['channel'].unique()
    chans.sort()
    fname = os.path.join(POST_DIR, '%s_unique-channel-names.csv' % sim_id)
    pd.DataFrame(chans).to_csv(fname)
 def readlog(self, fname):
     log = windIO.LogFile()
     log.readlog(os.path.join(self.logpath, fname))
     return log