Esempio n. 1
0
def seek_audio(src_audio_path, seek_time):
    new_file_path = src_audio_path + '.seek_{}_seconds.wav'.format(seek_time)
    audioclip = AudioFileClip(src_audio_path)
    sound_array = audioclip.to_soundarray()
    sound_array = np.array(sound_array)
    sample_rate = audioclip.fps  # default is 44100 times per second
    duration = audioclip.duration

    time = abs(seek_time)
    empty_sound_count = int(time * sample_rate)
    empty_sound = [0, 0]
    empty_sounds = np.zeros((empty_sound_count, 2))

    # If seek_time > 0, put the empty sound at the back after shifting sound to front
    # If seek time < 0, put the empty sound at the front after shifting sound to back (sound at back is lost)
    if seek_time > 0:
        sound_array = sound_array[empty_sound_count:len(sound_array)]
        sound_array = np.append(sound_array, empty_sounds)
    else:
        sound_array = sound_array[0:len(sound_array) - empty_sound_count]
        sound_array = np.append(empty_sounds, sound_array)

    sound_array = np.reshape(
        sound_array,
        (len(sound_array) // audioclip.nchannels, audioclip.nchannels))

    scaled = np.int16(sound_array / np.max(np.abs(sound_array)) * 32767)
    os.remove(src_audio_path)
    wav_write(new_file_path, sample_rate, scaled)

    return new_file_path
Esempio n. 2
0
def jcamp2wav(fh, wavenme=None, rate=44100, secs=5):
    ' convert a fh to a .dx file to a .wav file'

    def sound_func(amp, hz, t):  # sound generation
        return amp * sin(hz * t)

    def mean(l):
        return sum(l) / len(l)

    dct = jcamp.jcamp_read(fh)

    x, y = dct['x'], dct['y']
    dif = (y.max() - y.min())

    pk = peakdetect(y, x, lookahead=1, delta=dif / 10)

    max_peaks = pk[0]  # [ [x0,y0] , ...., [xn,yn] ]

    waves = [[(_y - y.min()) / dif,
              MusicFreq.freq2octave(_x, 0)]
             for _x, _y in max_peaks]  # amp(0..1), freq oct '0'
    waves.sort(reverse=True)  # get <= 10 most powerful
    waves = waves[:10]
    pi2 = pi * 2  # -> evaluate waves average for each sample
    data = np.asarray([
        mean([sound_func(amp, hz, t) for amp, hz in waves])
        for t in np.arange(0, secs * pi2, pi2 / rate)
    ],
                      dtype=np.float32)

    if wavenme is None or not wavenme:
        wavenme = fh.name.replace('.dx', '.wav')

    wav_write(wavenme, rate, data)
Esempio n. 3
0
def star_sound(fnme, wavenme=None, rate=44100, secs=5):  # from data[1]
    def sound_func(amp, hz, t):  # sound generation
        return amp * sin(hz * t) * sin(MusicFreq.freq2octave(hz, -7) * t)

    def mean(l):
        return sum(l) / len(l)

    irms, lrms, header, data = read_dat(fnme)

    y = data.T[1]
    x = data.T[0]
    dif = (y.max() - y.min())

    pk = peakdetect(y, x, lookahead=1, delta=dif / 10)
    max_peaks = pk[0]  # [ [x0,y0] , ...., [xn,yn] ]

    waves = [[(_y - y.min()) / dif, MusicFreq.freq2octave(_x, 0)] for _x, _y in max_peaks]  # amp(0..1), freq oct '0'
    waves.sort(reverse=True)  # get <= 10 most powerful
    waves = waves[:10]
    pi2 = pi * 2  # -> evaluate waves average for each sample
    datawav = np.asarray(
        [mean([sound_func(amp, hz, t) for amp, hz in waves]) for t in np.arange(0, secs * pi2, pi2 / rate)],
        dtype=np.float32)

    if wavenme is None or not wavenme:
        wavenme = fnme.replace('.dat', '.wav')

    wav_write(wavenme, rate, datawav)
Esempio n. 4
0
def downsample(filename, outrate=8000, write_wav = False):
    (rate, sig) = wav.read(filename)
    down_sig = librosa.core.resample(sig, rate, outrate, scale=True)
    if not write_wav:
        return down_sig, outrate
    if write_wav:
        wav_write('{}_down_{}.wav'.format(filename, outrate), outrate, down_sig)
Esempio n. 5
0
def downsample(filename, outrate=8000, write_wav = False):
    (rate, sig) = wav.read(filename)
    down_sig = librosa.core.resample(sig, rate, outrate, scale=True)
    if not write_wav:
        return down_sig, outrate
    if write_wav:
        wav_write('{}_down_{}.wav'.format(filename, outrate), outrate, down_sig)
Esempio n. 6
0
def downsample(filename, outrate=8000, write_wav=False):
    y, sr = librosa.load(filename, sr=22050)
    down_sig = librosa.core.resample(y, sr, outrate, scale=True)
    if not write_wav:
        return down_sig, outrate
    if write_wav:
        wav_write('{}_down_{}.wav'.format(filename, outrate), outrate,
                  down_sig)
Esempio n. 7
0
def main(path, out):
    img = cv2.imread(path)
    data = numpy.random.uniform(-1, 1, 44100)
    filters = build_filters()
    filtered = process(img, filters).swapaxes(0, 1)
    scaled = numpy.int16(filtered / numpy.max(numpy.abs(filtered)) * 32767)
    rv = numpy.ravel(scaled)
    wav_write(out, 44100, rv)
Esempio n. 8
0
 def write(
     self,
     name='temp',
 ):
     wav_write(
         '{}{}'.format(name, '' if '.wav' in name else '.wav'),
         int(self.sample_rate),
         (self.data * (2.**15.)).astype(np.int16),
     )
def segment_opensmile_extraction(config_p, segment_signal, fs, temp_p):

    temp_seg_path = '/tmp/temp_segment.wav'
    int16_s = np.asarray(segment_signal * 32767, dtype=np.int16)
    wav_write(temp_seg_path, fs, int16_s)
    opensmile_feat_vec = opensmile_extract(config_p, temp_seg_path, temp_p)

    subprocess.call(['rm', temp_seg_path])
    return opensmile_feat_vec
Esempio n. 10
0
def audiowrite(data, path, sample_rate=16000, normalize=False, threaded=True):
    """ Write the audio data ``data`` to the wav file ``path``

    The file can be written in a threaded mode. In this case, the writing
    process will be started at a separate thread. Consequently, the file will
    not be written when this function exits.

    :param data: A numpy array with the audio data
    :param path: The wav file the data should be written to
    :param sample_rate: Sample rate of the audio data
    :param normalize: Normalize the audio first so that the values are within
        the range of [INTMIN, INTMAX]. E.g. no clipping occurs
    :param threaded: If true, the write process will be started as a separate
        thread
    :return: The number of clipped samples
    """
    assert isinstance(path, (str, Path, io.BytesIO)), path
    assert data.dtype.kind in ['i', 'f'], (data.shape, data.dtype)

    if isinstance(path, Path):
        path = str(path)

    data = data.copy()

    if normalize:
        if not data.dtype.kind == 'f':
            data = data.astype(np.float)
        data /= np.maximum(np.amax(np.abs(data)), 1e-6)

    if data.dtype.kind == 'f':
        data *= int16_max

    sample_to_clip = np.sum(data > int16_max)
    if sample_to_clip > 0:
        print('Warning, clipping {} sample{}.'.format(
            sample_to_clip, '' if sample_to_clip == 1 else 's'
        ))
    data = np.clip(data, int16_min, int16_max)
    data = data.astype(np.int16)

    if threaded:
        threading.Thread(target=wav_write, args=(path, sample_rate, data)
                         ).start()
    else:
        try:
            wav_write(path, sample_rate, data)
        except Exception:  # _struct.error
            if data.ndim == 2:
                assert data.shape[1] < 20, (
                    f"channels bigger than 20 looks wrong "
                    f"(shape: {data.shape}). "
                    f"Maybe you must call audiowrite(data.T, ...)"
                )
            raise

    return sample_to_clip
Esempio n. 11
0
def save_wav(Qsav, filename, fs):
    data = np.array([]);
    print '\ngetting data\n'
    while(1):
        chunk = Qin.get() # append to list
        if chunk=="EOT":
            print 'Saving output...'
            wav_write(filename, fs, data)
            Q.task_done()
            return
        else:
            data = np.append(data, chunk) # append to list
            print len(data)
            Q.task_done()
Esempio n. 12
0
def sound(u, ts, n_samps=1, name=None):
    fs = 10000
    env = np.exp(-0.03 * ts)  #exponentially decaying envelope
    u_env = u * env
    samp_inds = np.round((np.linspace(0, m, n_samps + 2)))
    audio = u_env[samp_inds[1:-1].astype(int), :]
    audio = np.sum(audio, axis=0)
    #plt.figure()
    #plt.plot(ts, audio/max(audio))

    sd.play(audio / max(audio), fs)
    #audio_out = np.asarray(audio/max(audio), dtype=np.int16)
    if name is not None:
        wav_write('outputs/{} audio.wav'.format(name), fs, audio / max(audio))
Esempio n. 13
0
 def _handle_listen(self, text):
     duration = int(text) // config.audio.chunk_duration if text.strip(
     ) else 0
     if len(self.analyzer.audio) > 0:
         with self.analyzer.lock:
             audio = np.concatenate(tuple(self.analyzer.audio)[-duration:])
             self.analyzer.audio.clear()
         wav_file = 'listen.wav'
         wav_write(wav_file, config.audio.sample_rate, audio)
         opus = check_output(['opusenc', wav_file, '-'])
         stream = BytesIO(opus)
         stream.seek(0)
         self._send('voice', stream)
     else:
         self._reply('No audio recorded yet')
Esempio n. 14
0
def get_wav():
    out_file = 'out.wav'
    data = json.loads(request.data.decode('utf-8'), encoding='utf-8')

    try:
        text = data['text']
    except:
        return json.dumps({'error': 'text not set'}), 400, {
            'ContentType': 'application/json'
        }

    spect = np.asanyarray([k.npvalue() for k in encoder.predict(text)])
    signal = vocoder.synthesize(spect)
    wav_write(out_file, 24000, signal)

    return send_file(out_file, mimetype='audio/wav')
    def infer(filename):
        print('Start infer file: {}'.format(filename))
        #(wav, _) = read_wav(args.in_filepath) # read wav from given file path.
        (wav, _) = librosa.load(filename, 16000,
                                mono=True)  # read wav from given file path.
        wav = np.asarray(np.multiply(wav, 32768.0), dtype=np.int16)

        print(max(wav), min(wav), np.mean(wav))
        print(wav.shape)

        input_feat = sess.run(net.infer_feat,
                              feed_dict={
                                  net.s_ph: [wav],
                                  net.s_len_ph: [len(wav)]
                              })  # sample of training set.
        xi_bar_hat = sess.run(net.infer_output,
                              feed_dict={
                                  net.input_ph: input_feat[0],
                                  net.nframes_ph: input_feat[1],
                                  net.training_ph: False
                              })  # output of network.
        xi_hat = xi.xi_hat(xi_bar_hat, args.stats['mu_hat'],
                           args.stats['sigma_hat'])

        #file_name = filename.split('/')[-1].split('.')

        y_MAG = np.multiply(input_feat[0],
                            gain.gfunc(xi_hat, xi_hat + 1, gtype=args.gain))
        y = np.squeeze(
            sess.run(net.y,
                     feed_dict={
                         net.y_MAG_ph: y_MAG,
                         net.x_PHA_ph: input_feat[2],
                         net.nframes_ph: input_feat[1],
                         net.training_ph: False
                     }))  # output of network.
        if np.isnan(y).any():
            ValueError('NaN values found in enhanced speech.')
        if np.isinf(y).any():
            ValueError('Inf values found in enhanced speech.')

        y = np.asarray(np.multiply(y, 32768.0), dtype=np.int16)
        out_filepath = filename.replace('.' + filename.split('.')[-1],
                                        '_pred.wav')
        wav_write(out_filepath, args.f_s, y)
        print('Infer out file: {} done'.format(out_filepath))
        return out_filepath
Esempio n. 16
0
def main():
    rte = RunTimeEncoder()
    rte.load('models')

    rtv = RunTimeVocoder()
    rtv.load('models')

    with open('texts.txt', 'rt', encoding='utf-8') as f:
        lines = f.readlines()

        for k in range(len(lines)):
            text = lines[k].strip()

            spect = np.asanyarray([k.npvalue() for k in rte.predict(text)])

            signal = rtv.synthesize(spect)

            wav_write('tests/test_%d.wav' % k, 24000, signal)
Esempio n. 17
0
    def __call__(self, wav=None, sr=None):
        assert len(wav.shape) == 1
        _wav = None
        input_dtype = wav.dtype
        try:
            if random.random() < self.prob:
                speed_alpha = 1.0 + self.speed_limit * random.uniform(-1, 1)
                pitch_alpha = self.pitch_limit * random.uniform(-1, 1) * 100 # in cents
                #  https://github.com/carlthome/python-audio-effects/blob/master/pysndfx/dsp.py#L531
                with NamedTemporaryFile(suffix=".wav",
                                        dir=tempfile_dir) as temp_file:
                    temp_filename = temp_file.name
                    # always feed int16 to sox
                    if wav.dtype == np.float32():
                        wav_int = float2int(wav)
                        wav_write(temp_filename,
                                  sr,
                                  wav_int)
                    else:
                        wav_write(temp_filename,
                                  sr,
                                  wav)

                    torchaudio.initialize_sox()
                    effects = torchaudio.sox_effects.SoxEffectsChain()
                    effects.append_effect_to_chain('pitch', pitch_alpha)
                    effects.append_effect_to_chain('tempo', [speed_alpha])
                    effects.append_effect_to_chain('rate', sr)
                    effects.set_input_file(temp_filename)
                    _wav, _sr = effects.sox_build_flow_effects()
                    torchaudio.shutdown_sox()
                    _wav = _wav.numpy().squeeze()
                    assert sr == _sr
                    # always float output
                    if _wav.dtype == np.int16():
                        _wav = int2float(_wav)
        except Exception as e:
            print(str(e))

        if _wav is not None:
            return {'wav': _wav,'sr': sr}
        else:
            return {'wav': wav,'sr': sr}
Esempio n. 18
0
def audiowrite(data, path, samplerate=16000, normalize=False, threaded=True):
    """ Write the audio data ``data`` to the wav file ``path``

    The file can be written in a threaded mode. In this case, the writing
    process will be started at a separate thread. Consequently, the file will
    not be written when this function exits.

    :param data: A numpy array with the audio data
    :param path: The wav file the data should be written to
    :param samplerate: Samplerate of the audio data
    :param normalize: Normalize the audio first so that the values are within
        the range of [INTMIN, INTMAX]. E.g. no clipping occurs
    :param threaded: If true, the write process will be started as a separate
        thread
    :return: The number of clipped samples
    """
    data = data.copy()
    int16_max = np.iinfo(np.int16).max
    int16_min = np.iinfo(np.int16).min

    if normalize:
        if not data.dtype.kind == 'f':
            data = data.astype(np.float)
        data /= np.max(np.abs(data))

    if data.dtype.kind == 'f':
        data *= int16_max

    sample_to_clip = np.sum(data > int16_max)
    if sample_to_clip > 0:
        print('Warning, clipping {} samples'.format(sample_to_clip))
    data = np.clip(data, int16_min, int16_max)
    data = data.astype(np.int16)

    if threaded:
        threading.Thread(target=wav_write,
                         args=(path, samplerate, data)).start()
    else:
        wav_write(path, samplerate, data)

    return sample_to_clip
	def evaluate_on_file(self, eval_file, save_dir, dset_X='X', dset_Y='Y'):
		"""
		Arguments
		eval_file: HDF5 file containing the evaluation dataset
		save_dir: Directory to save the generated audio
		dset_X: Name of training set features inside the HDF5 file
		dset_Y: Name of training set labels inside the HDF5 file
		"""
		save_file = os.path.join(save_dir, os.path.splitext(os.path.basename(eval_file))[0]+'.wav')
		X = hdf5matrix(train_file, dset_X)
		Y = hdf5matrix(train_file, dset_Y)
		audio = []
		data_gen = TimeseriesGenerator(X, Y, length=self.receptive_field, batch_size=1)
		steps = len(data_gen)
		for step in range(steps):
			if step==0:
				batch_x, batch_y = data_gen[step]
				pred = self.model.predict(batch_x, batch_size=1, verbose=1)
				val = np.random.choice(range(256), p = pred[:])
				audio.append(val)
			else:
				batch_x, batch_y = data_gen[step]
				batch_x[0,-1,:256] = 0
				batch_x[0,-1,int(audio[-1])] = 1
				pred = self.model.predict(batch_x, batch_size=1, verbose=1)
				val = np.random.choice(range(256), p = pred[:])
				audio.append(val)
		def inverse_mu_law(quantized):
			# scale to [-1,1]
			quantized = np.asarray(quantized).astype(np.float32)
			quantized /= 128
			quantized -= 1
			wav_scale = max(np.abs(np.iinfo(np.int16).min),np.iinfo(np.int16).max)
			quantized *= wav_scale
			audio = quantized.astype(np.int16)
			return audio

		gen_audio = inverse_mu_law(audio)
		wav_write(save_file, 16000, gen_audio)
Esempio n. 20
0
os.environ['PATH'] = os.getcwd()+"/renode:"+os.environ['PATH']

# %% [markdown]
"""## Record 1 sec of audio after clicking the button"""

# %%
audio, sr = get_audio()

# %% [markdown]
"""## Convert audio to required format"""

# %%
from scipy.io.wavfile import write as wav_write
import pyaudioconvert as pac

wav_write('audio.wav', sr, audio)
#convert to 16bit because Chrome seems to ignore recording settings
pac.convert_wav_to_16bit_mono('audio.wav', 'converted.wav')
sr, audio = wav_read('converted.wav')
audio.tofile('audio_bin')
!soxi converted.wav

# %% [markdown]
"""## Run a micro-speech example with a recorded audio sample in Renode"""

# %%
from pyrenode import *
shutdown_renode()
connect_renode() # this sets up a log file, and clears the simulation (just in case)

tell_renode('mach create')
Esempio n. 21
0
 def write_wav(self, name):
     """save waveform to file
     The bits-per-sample will be determined by the data-type (mostly)"""
     wav_write(name, self._fs, self._value)
Esempio n. 22
0
def make_burst(duration):
    pip_samples = math.ceil(fs * duration)
    #wave = (numpy.random.random(pip_samples) * 2) - 1
    wave = numpy.random.normal(size=pip_samples)
    window = signal_ramp(pip_samples, 5)
    wave = wave * window
    return wave


repeats = 10
silence = make_pip(0.25, 0) * 0
hp_filter = HighPassFilter(3000, fs, 6)
parts = []
for i in range(repeats):
    burst = make_burst(0.5)
    burst = hp_filter.run(burst)
    parts.append(burst)
    parts.append(silence)

sound = numpy.concatenate(parts)

pyplot.plot(sound)
pyplot.show()

wav_write('pip.wav', fs, sound)

# pip = make_pip(0.25, 1000)

#
#sound = numpy.concatenate((pip, silence))
Esempio n. 23
0
def handle_microphone(obj):
    audio, sr = get_audio()
    wav_write('audio.wav', sr, audio)
    pac.convert_wav_to_16bit_mono('audio.wav', 'converted.wav')
    sr, audio = wav_read('converted.wav')
    audio.tofile('audio_bin')
Esempio n. 24
0
    def __call__(self, wav=None, sr=None):

        assert len(wav.shape) == 1
        _wav = None

        if random.random() < self.prob:
            codec = random.choice(self.sox_codec_list)
            quality = random.choice(self.quality_presets)
            sox_sr = random.choice(self.sox_sr_list)
            # supress warnings
            if codec in ['amr-nb', 'gsm']:
                sox_sr = 8

            with NamedTemporaryFile(suffix="."+codec,
                                    dir=tempfile_dir) as codec_temp_file:
                with NamedTemporaryFile(suffix=".wav",
                                        dir=tempfile_dir) as wav_temp_file:

                    # save wav to disk
                    # transform using a codec
                    # convert back to wav and read

                    codec_temp_filename = codec_temp_file.name
                    wav_temp_filename = wav_temp_file.name

                    if wav.dtype == np.float32():
                        wav_int = float2int(wav)
                        wav_write(wav_temp_filename,
                                sr,
                                wav_int)
                    else:
                        wav_write(wav_temp_filename,
                                  sr,
                                  wav)                        

                    sox_params = 'sox {} -r {}k -c 1 -C {} -t {} {}'.format(
                        wav_temp_filename,
                        sox_sr,
                        quality,
                        codec,
                        codec_temp_filename
                    )
                    sox_reverse_params = 'sox {} -r {}k -c 1 -e signed-integer -t {} {}'.format(
                        codec_temp_filename,
                        str(int(sr//1000)),
                        'wav',
                        wav_temp_filename
                    )

                    # print(sox_params)
                    # print(sox_reverse_params)

                    _ = subprocess.Popen(sox_params,
                                         shell=True,
                                         stdout=subprocess.PIPE).stdout.read()

                    _ = subprocess.Popen(sox_reverse_params,
                                         shell=True,
                                         stdout=subprocess.PIPE).stdout.read()

                    _sr, _wav = wav_read(wav_temp_file)
                    assert _sr == sr
                    if _wav.dtype == np.int16():
                        _wav = int2float(_wav)

        if _wav is not None:
            return {'wav': _wav, 'sr': sr}
        else:
            return {'wav': wav, 'sr': sr}
Esempio n. 25
0
def create_words_template_fromwav(wav, wordlist=[], Fs=16000):
    frm_time_width = 0.02  # 20ms
    analy_sta = "noinit"
    ana_frm_cnt = 0

    wav_data = np.array(wav)
    ana_frm_width = int(frm_time_width * Fs)
    ana_frm_step = ana_frm_width // 2

    noise_tmpt_frms = 6

    noise_fft_tmpt = np.zeros((noise_tmpt_frms, ana_frm_step))
    noise_freq_tmpt = np.zeros((noise_tmpt_frms, ana_frm_step))

    # 保存旧参数文件
    wavfile = "./ResFiles/Wav/Word_voice_tmpt.wav"
    time_ymd_hms = time.strftime("_%Y%m%d_%H%M%S", time.localtime())
    new_wavfile = wavfile[0:-4] + time_ymd_hms + ".wav"
    para_file = wavfile[0:-4] + 'WordPara' + ".npz"
    new_para_file = para_file[0:-4] + time_ymd_hms + ".npz"

    shutil.copyfile(wavfile, new_wavfile)
    shutil.copyfile(para_file, new_para_file)

    write_data = np.array(wav_data, dtype='int16')
    wav_write(wavfile, Fs, write_data)

    wav_data = sigana.normalization_pn1(wav_data)

    # print(len(wav_data))
    # print(wav_data[0:200])

    while analy_sta == "noinit":
        ana_poi = ana_frm_cnt * ana_frm_step
        _frm = sigana.get_frame(wav_data, ana_poi, ana_frm_width)
        _nfft, _freq_y = sigana.calc_fft(_frm, Fs)
        noise_fft_tmpt[ana_frm_cnt] = _nfft
        noise_freq_tmpt[ana_frm_cnt] = _freq_y
        ana_frm_cnt += 1

        if ana_frm_cnt >= noise_tmpt_frms:
            analy_sta = "noise"
            #             add_voi_ana_log(ana_poi, analy_sta)
            break

        else:
            pass

    while True:
        ana_poi = ana_frm_cnt * ana_frm_step
        ana_frm_cnt += 1
        _end_poi = ana_poi + ana_frm_width
        if _end_poi > len(wav_data):
            analy_sta = "finished"
            break

        _frm = sigana.get_frame(wav_data, ana_poi, ana_frm_width)

        _sum, _judge = judge_voice_energy(_frm)

        if _judge == "voiless":
            # 根据短时能量判定为清音
            if analy_sta != "voiless":
                if analy_sta == "noise":
                    create_new_voice_fileseg(ana_poi)

                analy_sta = "voiless"
                add_voi_ana_log(ana_poi, analy_sta)
                # add_sum_log(_sum)
                add_sum_log(0.5)
        elif _judge == "voied":
            # 根据短时能量判定为浊音
            if analy_sta != "voied":
                if analy_sta == "noise":
                    create_new_voice_fileseg(ana_poi)

                analy_sta = "voied"
                add_voi_ana_log(ana_poi, analy_sta)
                # add_sum_log(_sum)
                add_sum_log(1)
        else:
            # 根据一阶差分值判别清音和噪音
            _sum, _judge = judge_voiless_delta(_frm)

            if _judge == "noise":
                if analy_sta != "noise":
                    # if ana_poi - g_word_arr[g_word_idx].sta_poi >= 400:
                    finish_of_voice_fileseg(ana_poi)
                    analy_sta = "noise"
                    add_voi_ana_log(ana_poi, analy_sta)
                    # add_sum_log(_sum)
                    add_sum_log(0.2)
            elif _judge == "voiless":
                if analy_sta != "voiless":
                    if analy_sta == "noise":
                        create_new_voice_fileseg(ana_poi)

                    analy_sta = "voiless"
                    add_voi_ana_log(ana_poi, analy_sta)
                    # add_sum_log(_sum)
                    add_sum_log(0.5)
            else:
                pass

        if analy_sta == "voiless":
            add_frm_stat_log(ana_poi, 0.25)
        elif analy_sta == "voied":
            add_frm_stat_log(ana_poi, 0.5)
        elif analy_sta == "noise":
            add_frm_stat_log(ana_poi, -0.15)

    print("len(g_word_arr): ", len(g_word_arr))
    print("g_word_idx: ", g_word_idx)

    MIN_WORD_SPACE = 2400
    global g_check_word_idx
    global g_check_word_arr

    for i in range(len(g_word_arr)):
        if i == 0:
            _new_vfseg = VoiceFileSegInfo(wavfile)
            _new_vfseg.sta_poi = g_word_arr[i].sta_poi
            _new_vfseg.end_poi = g_word_arr[i].end_poi
            g_check_word_arr.append(_new_vfseg)
            # g_check_word_idx = g_check_word_idx + 1
        # if (g_word_arr[i].end_poi - g_word_arr[i].sta_poi) >= 640:
        if i >= 1:
            if (g_word_arr[i].sta_poi -
                    g_check_word_arr[g_check_word_idx].end_poi
                ) >= MIN_WORD_SPACE:
                g_check_word_arr[g_check_word_idx].end_poi = g_word_arr[
                    i - 1].end_poi
                # create a new vfseg for new word
                _new_vfseg2 = VoiceFileSegInfo(wavfile)
                _new_vfseg2.sta_poi = g_word_arr[i].sta_poi
                _new_vfseg2.end_poi = g_word_arr[i].end_poi
                g_check_word_arr.append(_new_vfseg2)
                g_check_word_idx = g_check_word_idx + 1

            else:
                g_check_word_arr[g_check_word_idx].end_poi = g_word_arr[
                    i].end_poi

        if i == len(g_word_arr) - 1:
            g_check_word_arr[g_check_word_idx].end_poi = g_word_arr[i].end_poi

    print("len(g_word_arr): ", len(g_word_arr))
    print("len(g_check_word_arr): ", len(g_check_word_arr))

    save_pars = []

    _word_n = len(g_check_word_arr)
    if _word_n > len(wordlist):
        _word_n = len(wordlist)

    for i in range(_word_n):

        _disp_data = wav_data[g_check_word_arr[i].sta_poi:g_check_word_arr[i].
                              end_poi]

        if len(_disp_data) >= 2400:
            move_time = 0.01
            num_frms = int(len(_disp_data) / (16000 * move_time))
            _fft, _freq = sigana.cal_frames_fft_log(_disp_data * 10000,
                                                    16000,
                                                    t_start=0,
                                                    t_window=0.02,
                                                    frame_n=num_frms,
                                                    move_step=move_time)
            _fft = sigana.normalization_array_pn1(_fft)

            save_pars.append(_fft)

            voice_para_save.save_word_voice_para(
                g_wordpara_arr[i],
                _fft,
                word=wordlist[i],
                file=wavfile,
                s_idx=g_check_word_arr[i].sta_poi,
                e_idx=g_check_word_arr[i].end_poi,
                time=0.0,
                ext_info="")

    para_file_name = wavfile[0:-4] + 'WordPara' + ".npz"
    np.savez(para_file_name, g_wordpara_arr)
Esempio n. 26
0
def save_wav(save_path, f_s, wav):
    if isinstance(wav[0], np.float32):
        wav = np.asarray(np.multiply(wav, 32768.0), dtype=np.int16)
    wav_write(save_path, f_s, wav)
Esempio n. 27
0
import sys
import numpy as np

from scipy.io.wavfile import write as wav_write

audio = np.empty((0,1), dtype = np.uint8)
with open(sys.argv[1]) as f:
    for i, line in enumerate(f):
        if i == 0 or i == 1:
            print(i)
            continue
        l = list(line)
        l.remove('\n')
        ll = np.packbits(np.array(l, dtype=np.uint8))
        audio = np.vstack((audio, ll.astype(np.uint8)))

audio = np.stack(audio, axis=1)[0]
wav_write("output.wav", data=audio, rate=int(sys.argv[2]))
Esempio n. 28
0
 def write_wav(self,filename):
     wav_write(filename,round(float(self.samplerate)),self.data)
Esempio n. 29
0
def make_spike_wav(fname_spikes,
                   fname_wav,
                   nrn_idx=None,
                   time_mode="real",
                   fname_spike_kernel=None,
                   wav_dtype=np.int32,
                   plot=False):
    """Create a wav audio file from the spike data

    Parameters
    ----------
    fname_spikes: string
        input spike data text filename
        first column is time
        subsequent columns are each neuron's spikes as would be recorded in the nengo simulator
    fname_wav: string
        filename of output wav data
    nrn_idx: list-like or none
        indices of neurons to use to generate wav file
        if None, uses all neurons, one per wav file channel
    time_mode: "real" or "sim"
        Whether to use the spike's real or simulation time
    fname_spike_kernel: string or None
        if string, wav file name of kernel to convolve spikes with to generate different sounds
    wav_dtype: numpy dtype
        data type to be used in the wav file
    """
    assert isinstance(fname_spikes, str)
    assert isinstance(fname_wav, str)
    assert time_mode in ["real", "sim"]

    sim_time, measured_time, spk_data_raw = _load_spike_data(fname_spikes)
    if nrn_idx is not None:
        spk_data_raw = spk_data_raw[:, nrn_idx]
    n_samples, n_neurons = spk_data_raw.shape
    if time_mode == "real":
        time = measured_time
    elif time_mode == "sim":
        time = sim_time
    n_resamples = int(np.ceil(time[-1] * AUDIO_SAMPLE_RATE))
    spk_data = np.zeros((n_resamples, n_neurons))

    for nrn_idx in range(n_neurons):
        spk_idx = np.nonzero(spk_data_raw[:, nrn_idx])[0]
        spk_times = time[spk_idx]
        spk_resampled_idx = np.round(spk_times * AUDIO_SAMPLE_RATE).astype(int)
        spk_data[spk_resampled_idx,
                 nrn_idx] = AUDIO_SAMPLE_RATE  # impulse as 1/dt

    if fname_spike_kernel is not None:
        assert isinstance(fname_spike_kernel, str)
        spike_kernel_sample_rate, spk_kernel_data = wav_read(
            fname_spike_kernel)
        assert spike_kernel_sample_rate == AUDIO_SAMPLE_RATE, (
            "spike_kernel wav file sample rate must match AUDIO_SAMPLE_RATE")
        assert len(spk_kernel_data.shape
                   ) == 1, "spike_kernel wav data must have a single channel"
        spk_kernel_data = spk_kernel_data
        shift_idx = np.argmax(
            np.abs(spk_kernel_data))  # find peak of kernel for later alignment
        spk_data_preconv = spk_data.copy()
        spk_data_postconv = np.zeros(
            (spk_data.shape[0] + len(spk_kernel_data) - 1, n_neurons))

        for nrn_idx in range(n_neurons):
            spk_data_postconv[:, nrn_idx] = convolve(spk_data[:, nrn_idx],
                                                     spk_kernel_data,
                                                     mode="full")
        spk_data = spk_data_postconv
        spk_data = spk_data[shift_idx:]  # align with original waveform
        spk_data = spk_data[:n_resamples]  # clip to original length
    np.clip(spk_data,
            np.iinfo(wav_dtype).min,
            np.iinfo(wav_dtype).max, spk_data)
    spk_data = spk_data.astype(wav_dtype)
    wav_write(fname_wav, AUDIO_SAMPLE_RATE, spk_data)

    if plot:
        spk_data_fig = plt.figure()
        time_resampled = np.arange(n_resamples) * AUDIO_SAMPLE_DT
        if fname_spike_kernel is not None:
            ax = spk_data_fig.add_subplot(211)
            ax.plot(time_resampled, spk_data_preconv)
            ax.set_ylabel("raw spike waveform")
            ax.set_title("spike waveforms")
            ax = spk_data_fig.add_subplot(212, sharex=ax)
            ax.plot(time_resampled, spk_data)
            ax.set_ylabel("filtered spike waveform")
            ax.set_xlabel("time (s)")

            time_kernel = np.arange(len(spk_kernel_data)) * AUDIO_SAMPLE_DT
            spk_kernel_fig = plt.figure()
            ax = spk_kernel_fig.add_subplot(111)
            ax.plot(time_kernel, spk_kernel_data)
            ax.set_title("spike kernel waveform")
            ax.set_xlabel("time (s)")
        else:
            ax = spk_data_fig.add_subplot(111)
            ax.plot(spk_data)
        plt.show()