Exemple #1
0
def preprocess_all_mk1(norm_wind=None,
                       div_factor=300,
                       mode='train',
                       disp=True):
    """Preprocesses all the data.
    Simply scales the data with div_factor, then applies running zeromean"""
    csvlist = io.get_file_list(mode=mode, fullpath=True)
    pif = lambda msg: printflush(msg) if disp else None

    pif('MK1 preprocessing')

    for fullpath in csvlist:
        t0 = time()
        fpath, fname = os.path.split(fullpath)
        data = pd.read_csv(fullpath).values[:,1:]
        pif('Processing ' + fname + ' -- ' + str(data.shape[0]) + ' samples...')

        # Scale the data
        data /= float(div_factor)

        # Execute the running mean
        if norm_wind is not None:
            wind = norm_wind
        else:
            wind = data.shape[0]
        final_data = utils.running_zeromean(data, wind, axis=0)
        pif("\b" + "%.3f"%(time()-t0) + " s\n")

        str_wind = 'FULL' if wind==data.shape[0] else str(wind)
        final_fname = fullpath[:-4] + '_mk1_norm' + str_wind
        np.save(final_fname, final_data)
Exemple #2
0
def preprocess_all_mk0(norm_wind=None,
                       nperseg=256,
                       mode='train',
                       max_freq_count=10,
                       disp=True):
    """Preprocesses all the data.
    Append the 10 highest frequency components of each previous nperseg window"""
    csvlist = io.get_file_list(mode=mode, fullpath=True)

    pif = lambda msg: printflush(msg) if disp else None

    pif('MK0 preprocessing')

    for fullpath in csvlist:
        t0 = time()
        fpath, fname = os.path.split(fullpath)
        data = pd.read_csv(fullpath).values[:,1:]
        pif('Processing ' + fname + ' -- ' + str(data.shape[0]) + ' samples...')
        
        # Get the spectrograph
        f,t,sxx = utils.spectrogram(data, window='boxcar', nperseg=nperseg)
        #spectro_fname = fullpath[:-4] + '_spectro'
        #np.save(spectro_fname, sxx)

        # N Principal frequencies (a normalized index)
        max_freqs = principal_frequencies(sxx, max_freq_count)

        # BLow up the max frequencies to match the data array
        repeated_max_freqs = np.zeros((data.shape[0], max_freq_count), dtype=max_freqs.dtype)
        tmp = np.zeros((1, max_freqs.shape[1]))
        max_freqs = np.insert(max_freqs, 0, tmp, axis=0)
        for k in range(0,max_freqs.shape[0]-1):
            repeated_max_freqs[k*nperseg:(k+1)*nperseg,:] = np.tile(max_freqs[k,:], (nperseg,1))
        final_index = k




        # Execute the running mean
        if norm_wind is not None:
            wind = norm_wind
        else:
            wind = data.shape[0]
        norm_data = utils.running_zeromean(data, wind, axis=0)
        pif("\b" + "%.3f"%(time()-t0) + " s\n")

        # Concatenate
        #del data
        final_data = np.append(norm_data, repeated_max_freqs, axis=1)
        #del norm_data

        str_wind = 'FULL' if wind==data.shape[0] else wind
        final_fname = fullpath[:-4] + '_mk0' + '_W' + str(nperseg) + '_norm' + str(str_wind)
        np.save(final_fname, final_data)