def preprocess_all_mk1(norm_wind=None, div_factor=300, mode='train', disp=True): """Preprocesses all the data. Simply scales the data with div_factor, then applies running zeromean""" csvlist = io.get_file_list(mode=mode, fullpath=True) pif = lambda msg: printflush(msg) if disp else None pif('MK1 preprocessing') for fullpath in csvlist: t0 = time() fpath, fname = os.path.split(fullpath) data = pd.read_csv(fullpath).values[:,1:] pif('Processing ' + fname + ' -- ' + str(data.shape[0]) + ' samples...') # Scale the data data /= float(div_factor) # Execute the running mean if norm_wind is not None: wind = norm_wind else: wind = data.shape[0] final_data = utils.running_zeromean(data, wind, axis=0) pif("\b" + "%.3f"%(time()-t0) + " s\n") str_wind = 'FULL' if wind==data.shape[0] else str(wind) final_fname = fullpath[:-4] + '_mk1_norm' + str_wind np.save(final_fname, final_data)
def preprocess_all_mk0(norm_wind=None, nperseg=256, mode='train', max_freq_count=10, disp=True): """Preprocesses all the data. Append the 10 highest frequency components of each previous nperseg window""" csvlist = io.get_file_list(mode=mode, fullpath=True) pif = lambda msg: printflush(msg) if disp else None pif('MK0 preprocessing') for fullpath in csvlist: t0 = time() fpath, fname = os.path.split(fullpath) data = pd.read_csv(fullpath).values[:,1:] pif('Processing ' + fname + ' -- ' + str(data.shape[0]) + ' samples...') # Get the spectrograph f,t,sxx = utils.spectrogram(data, window='boxcar', nperseg=nperseg) #spectro_fname = fullpath[:-4] + '_spectro' #np.save(spectro_fname, sxx) # N Principal frequencies (a normalized index) max_freqs = principal_frequencies(sxx, max_freq_count) # BLow up the max frequencies to match the data array repeated_max_freqs = np.zeros((data.shape[0], max_freq_count), dtype=max_freqs.dtype) tmp = np.zeros((1, max_freqs.shape[1])) max_freqs = np.insert(max_freqs, 0, tmp, axis=0) for k in range(0,max_freqs.shape[0]-1): repeated_max_freqs[k*nperseg:(k+1)*nperseg,:] = np.tile(max_freqs[k,:], (nperseg,1)) final_index = k # Execute the running mean if norm_wind is not None: wind = norm_wind else: wind = data.shape[0] norm_data = utils.running_zeromean(data, wind, axis=0) pif("\b" + "%.3f"%(time()-t0) + " s\n") # Concatenate #del data final_data = np.append(norm_data, repeated_max_freqs, axis=1) #del norm_data str_wind = 'FULL' if wind==data.shape[0] else wind final_fname = fullpath[:-4] + '_mk0' + '_W' + str(nperseg) + '_norm' + str(str_wind) np.save(final_fname, final_data)