コード例 #1
0
ファイル: run_test.py プロジェクト: pglushkov/py_snd_utils
def tst_sig_chunks():
    len = 13
    chunk_size = 8
    olap = 4

    sig = numpy.random.randn(1, len)

    print(sig.shape)
    print(sig)

    # case 0
    res = utils_sig.cut_sig_into_chunks(sig,
                                        chunk_size,
                                        overlap_size=olap,
                                        pad_zeros=False)
    print(res.shape)
    print(res)

    # case 1
    res = utils_sig.cut_sig_into_chunks(sig, chunk_size, overlap_size=olap)
    print(res.shape)
    print(res)

    # case 2
    res = utils_sig.cut_sig_into_chunks(sig, chunk_size)
    print(res.shape)
    print(res)

    # case 3
    res = utils_sig.cut_sig_into_chunks(sig, chunk_size, pad_zeros=False)
    print(res.shape)
    print(res)
コード例 #2
0
def run_main():

    if len(sys.argv) <= 1:
        raise Exception("Need to specify input wav-file to process")

    wavname = sys.argv[1]

    if not os.path.exists(wavname):
        raise Exception(
            "Specified wavfile {0} does not seem to exist!".format(wavname))

    print("Will process file {0}".format(wavname))

    (samplerate, signal) = wav.read(wavname)
    sampleperiod = 1.0 / samplerate
    signal = signal.reshape((-1, 1))

    fft_size = 256
    nfilters = 15

    signal = utils_sig.pad_to_multiple_of(signal, fft_size, 0.0)
    sigchunks = utils_sig.cut_sig_into_chunks(signal.T, fft_size)
    spec_envs = utils_sp.get_spec_envelopes(sigchunks)

    # EXAMPLE:
    #scipy.signal.spectrogram(x, fs=1.0, window=('tukey', 0.25), nperseg=None, noverlap=None, nfft=None, detrend='constant', return_onesided=True, scaling='density', axis=-1, mode='psd')

    freq_grid, time_grid, sgram = sig.spectrogram(signal.squeeze(),
                                                  fs=samplerate,
                                                  window=sig.get_window(
                                                      'boxcar', fft_size),
                                                  nperseg=fft_size,
                                                  noverlap=0,
                                                  nfft=fft_size,
                                                  scaling='spectrum',
                                                  mode='magnitude')

    sgram = sgram.T

    #print(sgram.shape)
    #print(sgram.dtype)
    #print(spec_envs.shape)
    #print(spec_envs.dtype)

    sgram.tofile('./tmp/py_sgram.bin')
    spec_envs.tofile('./tmp/my_sgram.bin')
コード例 #3
0
def run_main_sgram_env():

    if len(sys.argv) <= 1:
        raise Exception("Need to specify input wav-file to process")

    wavname = sys.argv[1]

    if not os.path.exists(wavname):
        raise Exception(
            "Specified wavfile {0} does not seem to exist!".format(wavname))

    print("Will process file {0}".format(wavname))

    (samplerate, signal) = wav.read(wavname)
    sampleperiod = 1.0 / samplerate
    signal = signal.reshape((-1, 1))

    fft_size = 64
    nfilters = 15

    signal = utils_sig.pad_to_multiple_of(signal, fft_size, 0.0)
    sigchunks = utils_sig.cut_sig_into_chunks(signal.T, fft_size)
    spec_envs = utils_sp.get_spec_envelopes(sigchunks)
    fbank_envs = utils_sp.get_mel_fb_curves(spec_envs, samplerate, nfilters)

    timestep = float(fft_size) / float(samplerate)
    (fbank_envs_py, _) = psf.fbank(signal,
                                   samplerate=samplerate,
                                   winlen=timestep,
                                   winstep=timestep,
                                   nfilt=nfilters,
                                   nfft=fft_size,
                                   lowfreq=0,
                                   highfreq=None,
                                   preemph=0)

    SIG_DUR = sampleperiod * signal.shape[0]
    SIG_X = numpy.arange(0, SIG_DUR, sampleperiod)

    dfb, D_FB_X, _ = estimate_sc_from_envelopes(fbank_envs, samplerate,
                                                fft_size)

    #utils_plot.simple_plot(signal, SIG_X)
    #utils_plot.plot_curves( [signal, fbank_envs[:,1]], [SIG_X, FB_X] )
    #utils_plot.plot_curves([signal, deriv], [SIG_X, D_FB_X])
    utils_plot.plot_curves([signal, dfb], [SIG_X, D_FB_X])
コード例 #4
0
def run_main():

    if len(sys.argv) <= 1:
        raise Exception("Need to specify input wav-file to process")

    wavname = sys.argv[1]

    if not os.path.exists(wavname):
        raise Exception(
            "Specified wavfile {0} does not seem to exist!".format(wavname))

    print("Will process file : {0}".format(wavname))

    (samplerate, signal) = wav.read(wavname)
    sampleperiod = 1.0 / samplerate
    signal = signal.reshape((-1, 1))

    crest_frame_size = 2048

    sflat_frame_size = 1024
    sflat_fft_size = int(2**numpy.ceil(numpy.log2(sflat_frame_size)))

    signal_for_crest = utils_sig.pad_to_multiple_of(signal, crest_frame_size,
                                                    0.0)
    crestchunks = utils_sig.cut_sig_into_chunks(signal_for_crest.T,
                                                crest_frame_size)
    crestfactor_vals = utils_td.get_crest_from_chunks(crestchunks)

    freq_grid, time_grid, sgram = sig.spectrogram(signal.squeeze(),
                                                  fs=samplerate,
                                                  window=sig.get_window(
                                                      'boxcar',
                                                      sflat_frame_size),
                                                  nperseg=sflat_frame_size,
                                                  noverlap=0,
                                                  nfft=sflat_fft_size,
                                                  scaling='spectrum',
                                                  mode='magnitude')

    sgram = sgram.T

    flatness = utils_sp.calc_spec_gram_flatness(sgram)

    crestfactor_vals.tofile('./tmp/crest_vals.bin')
    flatness.tofile('./tmp/flatness_vals.bin')
コード例 #5
0
def run_main():
    
    if len(sys.argv) <= 1:
        raise Exception("Need to specify input wav-file to process")
    
    wavname = sys.argv[1]
    
    if not os.path.exists(wavname):
        raise Exception("Specified wavfile {0} does not seem to exist!".format(wavname))

    print("Will process file {0}".format(wavname))

    (samplerate, signal) = wav.read(wavname)
    sampleperiod = 1.0 / samplerate 
    signal = signal.reshape( (-1, 1) )

    fft_size = 256
    nfilters = 15

    signal = utils_sig.pad_to_multiple_of(signal, fft_size, 0.0)
    sigchunks = utils_sig.cut_sig_into_chunks(signal.T, fft_size)
    spec_envs = utils_sp.get_spec_envelopes(sigchunks)
    fbank_envs = utils_sp.get_mel_fb_curves(spec_envs, samplerate, nfilters)

    timestep = float(fft_size) / float(samplerate)
    (fbank_envs_py, _) = psf.fbank(signal,samplerate=samplerate,winlen=timestep,winstep=timestep,
      nfilt=nfilters,nfft=fft_size,lowfreq=0,highfreq=None,preemph=0)

    #simple_plot(signal, numpy.arange(signal.shape[0]) * sampleperiod)
    #simple_plot(fbank_envs[30,:])
    #simple_plot(fbank_envs_py[30,:])

    print(fbank_envs.shape)
    print(fbank_envs_py.shape)

    print(fbank_envs.dtype)
    print(fbank_envs_py.dtype)

    fbank_envs.tofile('./tmp/my_fbank.bin')
    fbank_envs_py.tofile('./tmp/py_fbank.bin')
コード例 #6
0
def run_emp_detect_type2(wavfile, config, silent=True):

    # ATTENTION!!! CURRENTLY NOT IMPLEMENTED!!!
    # need to work with
    # (DONE) 1) spectral change (basically envelope stability sort of)
    # (DONE) 2) peak-to-peak rate
    # (DONE) 3) syllable duration (basically a non-interrupted pitch segment)
    # (DONE) 4) pitch maxima (probably relatively to it's average value)

    (samplerate, signal) = wav.read(wavfile)
    signal = signal - numpy.mean(
        signal)  # just in case, cause some inputs are really screwed
    sampleperiod = 1.0 / samplerate
    signal_time = numpy.arange(len(signal)) * sampleperiod
    signal = signal.reshape((-1, 1))
    signal = signal / (2.0**15.0)
    signal_no_sil = utils_td.remove_silence(signal, 0.0001)
    std_no_sil = numpy.std(signal_no_sil)
    rms_no_sil = utils_td.get_rms(signal_no_sil)

    #print(numpy.mean(signal_no_sil))
    #print(std_no_sil)
    #print(rms_no_sil)

    chunk_nsamples = int(config['chunk_size_samples'])
    olap_nsamples = int(config['overlap_samples'])

    fft_size = int(2**numpy.ceil(numpy.log2(chunk_nsamples)))
    env_size = int(fft_size / 2 + 1)

    sig_chunks = utils_sig.cut_sig_into_chunks(signal.T,
                                               chunk_nsamples,
                                               overlap_step=olap_nsamples,
                                               pad_zeros=True)
    sig_chunks_num = sig_chunks.shape[0]
    sig_chunks_tstep = olap_nsamples / samplerate
    sig_chunks_time = numpy.arange(sig_chunks_num) * sig_chunks_tstep

    wrld_res = run_world_by_reaper(wavfile, config['wrk_path'],
                                   config['reaper_path'], config['world_path'])

    sig_f0 = numpy.fromfile(wrld_res[0]).reshape((-1, 1))
    sig_sp = numpy.fromfile(wrld_res[1]).reshape((env_size, -1))
    sig_f0_time = numpy.arange(sig_f0.shape[0]) * config['f0_time_step']
    sig_f0_no_sil = utils_td.remove_silence(sig_f0, 0.0)
    f0_std_no_sil = numpy.std(sig_f0_no_sil)

    #print(sig_f0.shape)
    #print(sig_sp.shape)

    ### =================== PITCH EXTREMUMS
    f0_extr = utils_pitch.get_f0_extreme_areas(
        sig_f0, config['f0_extr_thr'],
        config['f0_extr_len'] / config['f0_time_step'])
    f0_low = f0_extr[0] * (f0_extr[0] > 0).astype('int')
    f0_high = f0_extr[1] * (f0_extr[1] > 0).astype('int')
    f0_extr = utils_td.perform_mvn_norm((f0_low + f0_high), skip_zeros=True)
    # MY_DBG
    #utils_plot.plot_curves( [sig_f0 / numpy.max(sig_f0), f0_low, f0_high])

    ### =================== SPECTRAL CHANGE
    freq_step = samplerate / fft_size
    band_idx = [
        int(numpy.round(CFG['spec_change_band_st'] / freq_step)),
        int(numpy.round(CFG['spec_change_band_end'] / freq_step))
    ]
    (sc_dfb, sc_time) = estimate_sc_from_envelopes(sig_sp.T,
                                                   samplerate,
                                                   0.005 * samplerate,
                                                   band=band_idx)

    # MY_DBG
    #utils_plot.plot_curves([signal, sc_res], [signal_time, sc_time])

    ### =================== PEAK-to_PEAK
    p2p = utils_td.get_peak_to_peak_from_chunks(sig_chunks)
    # MY_DBG
    #utils_plot.plot_curves( [signal, p2p], [signal_time, sig_chunks_time])

    ### =================== VOICED MASK
    voiced = (sig_f0.squeeze() > 0.0).astype('int')
    # MY_DBG
    #utils_plot.plot_curves( [sig_f0 / numpy.max(sig_f0), voiced], [sig_f0_time, sig_f0_time])

    # PEAK-TO-CHANGE
    p2p_int = numpy.interp(signal_time.squeeze(), sig_chunks_time.squeeze(),
                           p2p.squeeze())
    sc_dfb_int = numpy.interp(signal_time.squeeze(), sc_time.squeeze(),
                              sc_dfb.squeeze())
    p2sc = numpy.log(p2p_int) - numpy.log(sc_dfb_int)
    p2sc = utils_sig.clean_undef_floats(p2sc)

    ### =================== FINALIZING RESULTS
    DETECT_VO = numpy.interp(signal_time.squeeze(), sig_f0_time.squeeze(),
                             voiced.squeeze())
    DETECT_EX = numpy.interp(signal_time.squeeze(), sig_f0_time.squeeze(),
                             f0_extr.squeeze())
    DETECT_P2SC = p2sc > config['peak2change_thr']

    RESULT_MASK = (DETECT_P2SC > 0) * (DETECT_VO > 0) * (DETECT_EX > 0)
    #RESULT_MASK = update_detection_results(RESULT_MASK, samplerate, config['detect_hysteresis'],
    #                                       config['detect_merge_threshold'])
    # one more time make sure unvoiced segs are not detected
    #RESULT_MASK = RESULT_MASK * (DETECT_VO > 0)

    #if not silent:
    #    utils_plot.plot_curves( [signal, RESULT_MASK], [signal_time, signal_time])

    SC_DFB = numpy.interp(signal_time.squeeze(), sc_time.squeeze(),
                          sc_dfb.squeeze())
    P2P = numpy.interp(signal_time.squeeze(), sig_chunks_time.squeeze(),
                       p2p.squeeze())

    dbg_stuff = {
        'peak2schange_detect': DETECT_P2SC,
        'voiced_detect': DETECT_VO,
        'f0-extreme_detect': DETECT_EX,
        'threshold_base': std_no_sil,
        'spec_change': SC_DFB,
        'peak2peak': P2P,
        'peak2schange': p2sc
    }

    return (RESULT_MASK, signal_time, dbg_stuff)
コード例 #7
0
def run_emp_detect_type1(wavfile, config, silent=True, reuse_data=False):

    # (DONE) 1) spectral change (basically envelope stability sort of)
    # (DONE) 2) peak-to-peak rate
    # (DONE) 3) syllable duration (basically a non-interrupted pitch segment)
    # (DONE) 4) pitch maxima (probably relatively to it's average value)

    (samplerate, signal) = wav.read(wavfile)
    signal = signal - numpy.mean(
        signal)  # just in case, cause some inputs are really screwed
    sampleperiod = 1.0 / samplerate
    signal_time = numpy.arange(len(signal)) * sampleperiod
    signal = signal.reshape((-1, 1))
    signal = signal / (2.0**15.0)
    signal_no_sil = utils_td.remove_silence(signal, 0.0001)
    std_no_sil = numpy.std(signal_no_sil)
    rms_no_sil = utils_td.get_rms(signal_no_sil)

    # MY_DBG
    #print(numpy.mean(signal_no_sil))
    #print(std_no_sil)
    #print(rms_no_sil)

    # MY_DBG
    #utils_plot.plot_curves([signal], [signal_time])
    #input('eat a dick!')

    chunk_nsamples = int(config['chunk_size_samples'])
    olap_nsamples = int(config['overlap_samples'])

    fft_size = int(2**numpy.ceil(numpy.log2(chunk_nsamples)))
    env_size = int(fft_size / 2 + 1)

    sig_chunks = utils_sig.cut_sig_into_chunks(signal.T,
                                               chunk_nsamples,
                                               overlap_step=olap_nsamples,
                                               pad_zeros=True)
    sig_chunks_num = sig_chunks.shape[0]
    sig_chunks_tstep = olap_nsamples / samplerate
    sig_chunks_time = numpy.arange(sig_chunks_num) * sig_chunks_tstep

    if reuse_data:
        print(
            "Trying to reuse previously-calculated data for file {0} in dir {1} ..."
            .format(wavfile, config['wrk_path']))
        wrld_res = utils_world.try_search_previous_world_results(
            wavfile, config['wrk_path'])
    else:
        wrld_res = utils_world.run_world_by_reaper(wavfile, config['wrk_path'],
                                                   config['reaper_path'],
                                                   config['world_path'])

    if (wrld_res[0] is None or wrld_res[1] is None or wrld_res[2] is None):
        raise Exception('LEFUCKUP')

    sig_f0 = numpy.fromfile(wrld_res[0]).reshape((-1, 1))
    sig_sp = numpy.fromfile(wrld_res[1]).reshape((env_size, -1))
    sig_f0_time = numpy.arange(sig_f0.shape[0]) * config['f0_time_step']
    sig_f0_no_sil = utils_td.remove_silence(sig_f0, 0.0)
    f0_std_no_sil = numpy.std(sig_f0_no_sil)

    #print(sig_f0.shape)
    #print(sig_sp.shape)

    ### =================== PITCH EXTREMUMS
    f0_extr = utils_pitch.get_f0_extreme_areas(
        sig_f0, config['f0_extr_thr'],
        config['f0_extr_len'] / config['f0_time_step'])
    f0_low = f0_extr[0] * (f0_extr[0] > 0).astype('int')
    f0_high = f0_extr[1] * (f0_extr[1] > 0).astype('int')
    f0_extr = utils_td.perform_mvn_norm((f0_low + f0_high), skip_zeros=True)
    # MY_DBG
    #utils_plot.plot_curves( [sig_f0 / numpy.max(sig_f0), f0_low, f0_high])

    ### =================== SPECTRAL CHANGE
    freq_step = samplerate / fft_size
    band_idx = [
        int(numpy.round(config['spec_change_band_st'] / freq_step)),
        int(numpy.round(config['spec_change_band_end'] / freq_step))
    ]
    (sc_dfb,
     sc_time) = utils_spec.estimate_sc_from_envelopes(sig_sp.T,
                                                      samplerate,
                                                      0.005 * samplerate,
                                                      band=band_idx)
    sc_res = (numpy.abs(sc_dfb) <
              config['spec_change_threshold'] * std_no_sil).astype('float')

    sc_res = sc_dfb * sc_res
    # MY_DBG
    #utils_plot.plot_curves([signal, sc_res], [signal_time, sc_time])

    ### =================== PEAK-to_PEAK
    p2p = utils_td.get_peak_to_peak_from_chunks(sig_chunks)
    p2p_det = (p2p > config['peak_to_peak_thr_std'] *
               std_no_sil).astype('float') * p2p
    # MY_DBG
    #utils_plot.plot_curves( [signal, p2p], [signal_time, sig_chunks_time])

    ### =================== VOICED MASK
    voiced = (sig_f0.squeeze() > 0.0).astype('int')
    # MY_DBG
    #utils_plot.plot_curves( [sig_f0 / numpy.max(sig_f0), voiced], [sig_f0_time, sig_f0_time])

    ### =================== FINALIZING RESULTS
    DETECT_SC = numpy.interp(signal_time.squeeze(), sc_time.squeeze(),
                             sc_res.squeeze())
    DETECT_VO = numpy.interp(signal_time.squeeze(), sig_f0_time.squeeze(),
                             voiced.squeeze())
    DETECT_PP = numpy.interp(signal_time.squeeze(), sig_chunks_time.squeeze(),
                             p2p_det.squeeze())
    DETECT_EX = numpy.interp(signal_time.squeeze(), sig_f0_time.squeeze(),
                             f0_extr.squeeze())

    RESULT_MASK = (DETECT_SC > 0) * (DETECT_VO > 0) * (DETECT_PP >
                                                       0) * (DETECT_EX > 0)
    RESULT_MASK = update_detection_results(RESULT_MASK, samplerate,
                                           config['detect_hysteresis'],
                                           config['detect_merge_threshold'],
                                           config['detect_min_len'],
                                           config['detect_max_len'])

    scan_seg_len = int(config['scan_region_len'] * samplerate)
    if scan_seg_len % 2 == 0:
        scan_seg_len += 1
    scan_segs = position_scan_regions(signal.squeeze(), RESULT_MASK,
                                      scan_seg_len,
                                      config['detect_scan_min_olap'])
    # MY_DBG
    #utils_plot.plot_emphasis_scan_segs(signal.squeeze(), RESULT_MASK, scan_segs, samplerate)
    #input('some input')

    # MY_DBG
    #utils_plot.plot_curves([signal], [signal_time])
    #utils_plot.plot_curves([signal, RESULT_MASK], [signal_time, signal_time])
    #input('eat a dick!')

    # one more time make sure unvoiced segs are not detected
    RESULT_MASK = RESULT_MASK * (DETECT_VO > 0)

    #if not silent:
    #    #utils_plot.plot_curves( [signal, RESULT_MASK], [signal_time, signal_time])
    #    utils_plot.plot_emphasis_scan_segs(signal.squeeze(), RESULT_MASK, scan_segs,
    #                                       samplerate)

    SC_DFB = numpy.interp(signal_time.squeeze(), sc_time.squeeze(),
                          sc_dfb.squeeze())
    P2P = numpy.interp(signal_time.squeeze(), sig_chunks_time.squeeze(),
                       p2p.squeeze())

    dbg_stuff = {
        'spec_change_detect': DETECT_SC,
        'voiced_detect': DETECT_VO,
        'peak2peak_detect': DETECT_PP,
        'f0-extreme_detect': DETECT_EX,
        'threshold_base': std_no_sil,
        'spec_change': SC_DFB,
        'peak2peak': P2P
    }

    return (RESULT_MASK, signal_time, scan_segs, dbg_stuff)
コード例 #8
0
def run_main():

    ARGS = parse_input_args()

    # DBG
    #print(ARGS)

    wavname = ARGS.i
    outname = ARGS.o

    if not os.path.exists(wavname):
        raise Exception(
            "Specified wavfile {0} does not seem to exist!".format(wavname))

    print("Will process file : {0}".format(wavname))
    print("Will write result to : {0}".format(outname))

    (samplerate, signal) = wav.read(wavname)
    sampleperiod = 1.0 / samplerate
    signal = signal.reshape((-1, 1))

    crest_frame_size = int(samplerate * float(ARGS.crest_size) / 1000.0)

    sflat_frame_size = int(samplerate * float(ARGS.sflat_size) / 1000.0)
    sflat_fft_size = int(2**numpy.ceil(numpy.log2(sflat_frame_size)))

    signal_for_crest = utils_sig.pad_to_multiple_of(signal, crest_frame_size,
                                                    0.0)
    crestchunks = utils_sig.cut_sig_into_chunks(signal_for_crest.T,
                                                crest_frame_size)
    crestfactor_vals = utils_td.get_crest_from_chunks(crestchunks)

    freq_grid, time_grid, sgram = sig.spectrogram(signal.squeeze(),
                                                  fs=samplerate,
                                                  window=sig.get_window(
                                                      'boxcar',
                                                      sflat_frame_size),
                                                  nperseg=sflat_frame_size,
                                                  noverlap=0,
                                                  nfft=sflat_fft_size,
                                                  scaling='spectrum',
                                                  mode='magnitude')

    sgram = sgram.T

    flatness = utils_sp.calc_spec_gram_flatness(sgram)

    crestperiod = crest_frame_size * sampleperiod
    sflatperiod = sflat_frame_size * sampleperiod
    SIG_X = numpy.arange(0, sampleperiod * len(signal), sampleperiod)
    CREST_X = numpy.arange(0, crestperiod * len(crestfactor_vals), crestperiod)
    SFLAT_X = numpy.arange(0, sflatperiod * len(flatness), sflatperiod)

    CREST_Y = numpy.interp(SIG_X, CREST_X, crestfactor_vals.squeeze())
    SFLAT_Y = numpy.interp(SIG_X, SFLAT_X, flatness.squeeze())

    # DBG
    #simple_plot(signal, SIG_X)
    #simple_plot(CREST_Y, SIG_X)
    #simple_plot(SFLAT_Y, SIG_X)

    RESULT_MASK = (CREST_Y > ARGS.crest_thr) * (
        SFLAT_Y >= ARGS.sflat_thr_down) * (SFLAT_Y <= ARGS.sflat_thr_up)
    #DBG
    utils_plot.simple_plot(RESULT_MASK, SIG_X)

    RESULT_MASK = RESULT_MASK.astype('float32')
    RESULT_MASK.tofile(outname)