def clean_final_df(self, df_in): """Declare NaN value and pad nan data for some.""" df = index_by_time(df_in) df[df == -9999.0] = np.nan df = prepare_data(df) define_sdtype(df) return df
def open_and_accumulate(fname): """One CAN NOT accumulate cleaned data files, because I rely on the numbering of calib-blocks to be unique! Each cleaning operation starts the numbering from 1 again! """ centerfile = L1ADataFile(fname) dataframes = deque() dataframes.append(centerfile.open()) # append previous hours until calib blocks found # start with center file: fn_handler = FileName(fname) while True: fn_handler.set_previous_hour() f = L1ADataFile(fn_handler.fname) print("Appending {0} on the left.".format(fn_handler.timestr)) dataframes.appendleft(f.open_dirty()) if any(f.open().is_calib): break # append next hours until calib blocks found # go back to center file name fn_handler = FileName(fname) while True: fn_handler.set_next_hour() f = L1ADataFile(fn_handler.fname) print("Appending {0} on the right.".format(fn_handler.timestr)) dataframes.append(f.open_dirty()) if any(f.open().is_calib): break df = prepare_data(pd.concat(list(dataframes))) define_sdtype(df) return df
def folder_to_df(folder, top_end=None, verbose=False): rec_dtype, keys = get_div247_dtypes() fnames = glob.glob(folder + '/*.div247') fnames.sort() if not top_end: top_end = len(fnames) dfall = pd.DataFrame() olddf = None for i, fname in enumerate(fnames[:top_end]): if verbose: print(round(float(i) * 100 / top_end, 1), '%') df = fname_to_df(fname, rec_dtype, keys) df = prepare_data(df) define_sdtype(df) if olddf is not None: for s in df.filter(regex='_labels'): df[s] += olddf[s].max() olddf = df.copy() dfall = pd.concat([dfall, df]) to_store = dfall[dfall.calib_block_labels > 0] return to_store
def folder_to_store(folder): rec_dtype, keys = get_div247_dtypes() fnames = glob.glob(folder + '/*.div247') if not fnames: print("Found no files.") return fnames.sort() # opening store in overwrite-mode storename = get_storename(folder) print(storename) store = pd.HDFStore(storename, mode='w') nfiles = len(fnames) olddf = None cols = [ 'calib_block_labels', 'sv_block_labels', 'bb_block_labels', 'st_block_labels', 'is_spaceview', 'is_bbview', 'is_stview', 'is_moving', 'is_stowed', 'is_calib' ] for i, fname in enumerate(fnames): print(round(float(i) * 100 / nfiles, 1), '%') df = fname_to_df(fname, rec_dtype, keys) df = prepare_data(df) define_sdtype(df) to_store = df[df.calib_block_labels > 0] if len(to_store) == 0: continue if olddf is not None: for s in to_store.filter(regex='_labels'): to_store[s] += olddf[s].max() olddf = to_store.copy() try: store.append('df', to_store, data_columns=cols) except Exception as e: store.close() print('at', fname) print('something went wrong at appending into store.') print(e) return print("Done.") store.close()
def folder_to_store(folder): rec_dtype, keys = get_div247_dtypes() fnames = glob.glob(folder + '/*.div247') if not fnames: print("Found no files.") return fnames.sort() # opening store in overwrite-mode storename = get_storename(folder) print(storename) store = pd.HDFStore(storename, mode='w') nfiles = len(fnames) olddf = None cols = ['calib_block_labels', 'sv_block_labels', 'bb_block_labels', 'st_block_labels', 'is_spaceview', 'is_bbview', 'is_stview', 'is_moving', 'is_stowed', 'is_calib'] for i, fname in enumerate(fnames): print(round(float(i) * 100 / nfiles, 1), '%') df = fname_to_df(fname, rec_dtype, keys) df = prepare_data(df) define_sdtype(df) to_store = df[df.calib_block_labels > 0] if len(to_store) == 0: continue if olddf is not None: for s in to_store.filter(regex='_labels'): to_store[s] += olddf[s].max() olddf = to_store.copy() try: store.append('df', to_store, data_columns=cols) except Exception as e: store.close() print('at', fname) print('something went wrong at appending into store.') print(e) return print("Done.") store.close()
def clean_final_df(self, df): df = prepare_data(df) define_sdtype(df) return df
def clean(self): df = prepare_data(self.df) define_sdtype(df) self.df = df
def clean_final_df(self, df): "need to wait until final df before defining sdtypes." df = prepare_data(df) define_sdtype(df) return df
def open_and_process(self): df = fname_to_df(self.fname, self.rec_dtype, self.keys) df = prepare_data(df) define_sdtype(df) self.df = df