def main(args):

    max_length = 0
    min_length = 0
    total_length = 0
    count = 0

    with open(os.path.join(args.data_dir, args.split + '.tsv'), 'r') as f:
        next(f)
        for line in f:

            line_split = line.split('\t')
            audio_fn = line_split[1]

            filepath = os.path.join(args.data_dir, 'clips', audio_fn[:-4] + '.wav')

            sr, data = read_wav(filepath)

            length = len(data) / sr

            if length > max_length:
                max_length = length
            if length < min_length or min_length == 0:
                min_length = length

            total_length += length
            count += 1

    avg_length = total_length / count

    print('Total: {:.4f} s'.format(total_length))
    print('Min length: {:.4f} s'.format(min_length))
    print('Max length: {:.4f} s'.format(max_length))
    print('Average length: {:.4f} s'.format(avg_length))
예제 #2
0
파일: separate.py 프로젝트: dzluke/smc2021
def separate(ckpt, mtgph, input_path, output_path):
    model = inference.SeparationModel(ckpt, mtgph)
    if not os.path.exists(input_path):
        raise Exception("Wrong input file {}".format(input_path))
    file_list = [input_path]
    from scipy.io.wavfile import read as read_wav
    from scipy.io.wavfile import write as write_wav
    sr, f = read_wav(file_list[0])

    with model.graph.as_default():
        dataset = data_io.wavs_to_dataset(file_list,
                                          batch_size=1,
                                          num_samples=len(f),
                                          repeat=False)
        # Strip batch and mic dimensions.
        dataset['receiver_audio'] = dataset['receiver_audio'][0, 0]
        dataset['source_images'] = dataset['source_images'][0, :, 0]

    waveforms = model.sess.run(dataset)
    separated_waveforms = model.separate(waveforms['receiver_audio'])
    # print(separated_waveforms)
    # print(separated_waveforms.shape)
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    for i in range(separated_waveforms.shape[0]):
        write_wav(output_path + '/sub_target{}.wav'.format(i), sr,
                  separated_waveforms[i, :])
예제 #3
0
def Extract_Pitch(self, player_pos_x, player_pos_y, object_pox_x,
                  object_pos_y):
    import librosa
    from scipy.io.wavfile import read as read_wav
    import os
    import numpy as np
    sampling_rate, data = read_wav("Test.wav")
    y, sr = librosa.load('Test.wav', sr=sampling_rate)
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr, fmin=75, fmax=1600)

    player = [player_pos_x, player_pos_y]
    target = [object_pox_x, object_pos_y]

    distance = scipy.spatial.distance.euclidean(player, target)

    if (distance == 0):
        factor = 1000
    else:
        factor = 1000 / distance

    pitch_values = []

    for i in range(len(pitches[0])):
        index = magnitudes[:, i].argmax()
        pitch = pitches[index, i] + factor
        pitch_values.append(pitch)
    return np.array(pitch_values)
예제 #4
0
파일: run.py 프로젝트: hyunwooj/unm-cs429
def load_validation_set():
    """
    Output
        a tuple of features: (fft features, mfcc features, mean-std features)
    Description
        extracts three types of features from validation set.
    """
    ffts = dict()
    mfccs = dict()
    mean_stds = dict()

    for i in validation_ids:
        path = './validation/validation.{i}.wav'.format(i=i)

        _, X = read_wav(path)

        # FFT
        fft = np.array(abs(sp.fft(X)[:1000]))
        ffts.update({i: fft})

        # MFCC
        ceps, mspec, spec = mfcc(X)
        num_ceps = len(ceps)
        x = np.mean(ceps[int(num_ceps*1/10):int(num_ceps*9/10)], axis=0)
        mfccs.update({i: x})


        # Mean-Std
        [Fs, x] = audioBasicIO.readAudioFile(path);
        F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.025*Fs);
        mean_std = []
        for f in F:
            mean_std.extend([f.mean(), f.std()])
        mean_stds.update({i: np.array(mean_std)})
    return (ffts, mfccs, mean_stds)
예제 #5
0
def convert(input_path, operation, output_type="", mp3=False, wav=True):
    timestamp = str(time.time()).split('.')[0]

    sr, y = read_wav(input_path)
    if operation == 'derivative':
        res = derivative(y, 1 / sr)
    elif operation == 'integral':
        res = integral(y, 1 / sr)
    else:
        print('ERROR: The operation ' + operation + ' is unknown!')
        return

    file_name = str(os.path.basename(input_path).split('.')[0])
    file_name_complete = timestamp + '_' + file_name + '_converted_to_' + output_type
    output_path = 'data/outputs/' + file_name_complete
    os.makedirs(output_path)
    output_path = output_path + '/'

    if wav:
        output.write_wav(output_path + file_name_complete + '.wav',
                         res,
                         sr=sr,
                         norm=True)

    if mp3:
        save_mp3(output_path + file_name_complete, res, sr)

    print('File converted to: ' + output_path)
 def get_item(self, key):
     try:
         return self.data_storage[key]
     except:
         # Lazy loading
         sr, signal = read_wav(key)
         self.data_storage[key] = (sr, signal)
         return sr, signal
예제 #7
0
    def _openWave(self,fileName):
        self.fps,self.wav_buf = read_wav(fileName)
        channels = self.wav_buf.ndim
        if channels>1: # do stereo->mono sound transform
            self.wav_buf = self.wav_buf.flatten()[::channels]

        self.blockLen = int(self.fps/VIDEO_FPS)
        self.blocksReaded = 0
        self.blocksToRead= int(len(self.wav_buf)/self.blockLen)
 def create_noise_samples(self):
     self.noises = []
     # Extract samples of 1s (length 16000) from the background noise index
     for f in self.silence_index:
         # Load file
         sample_rate, signal = read_wav(f)
         # Extract the more sample we can of length 1s
         # Remove sample_rate from len(signal) to avoid a shorter noise
         for i in range(0, len(signal) - sample_rate, sample_rate):
             self.noises.append(signal[i:i + sample_rate])
예제 #9
0
def wav2guitar_distortion(inFile,outFile=None, deepValue=0.5):
    """ преобразовать звук в файле таким образом, как будто он
    прошел через дисторшн примочку"""
    if outFile is None:
        outFile = changeFileExt(inFile,'_distortion.wav')

    fps,data_in = read_wav(inFile)
    Amax = int(deepValue*np.max(data_in))
    Amin = int(deepValue*np.min(data_in))
    data_out = np.clip(data_in,Amin,Amax) # clip sound amplitude
    write_wav(outFile,fps,data_out)
예제 #10
0
파일: io.py 프로젝트: mattdeak/fma-ml
    def read(self, filepath):
        """Reads an mp3 file into a numpy array.
        
        Arguments:
            filepath {str} -- The path to the mp3 file
        """
        # Convert from mp3 to wav
        sound = AudioSegment.from_mp3(filepath)
        sound.export(self.EXPORT_PATH, "wav")

        _, data = read_wav(self.EXPORT_PATH)
        return data
예제 #11
0
def get_data_from_flac(data):
    flac_temp = tempfile.NamedTemporaryFile(suffix=".flac")

    # writes data from server to local tempfile
    flac = open(flac_temp.name, 'w')
    flac.write(data)
    flac.close()

    wav_temp = tempfile.NamedTemporaryFile(suffix=".wav")
    sound = AudioSegment.from_file(flac_temp.name, "flac")
    sound.export(wav_temp.name, format="wav")
    bee_rate, bee_data = read_wav(wav_temp.name)
    flac_temp.close()
    wav_temp.close()
    return bee_rate, bee_data
예제 #12
0
def get_white_noise(noise_filename):

    sample_rate, white_noise_ndarray = read_wav(noise_filename)

    white_noise = list(white_noise_ndarray)

    white_noise_lower_limit = -2**15
    white_noise_upper_limit = 2**16

    white_noise_normalized = [
        (noise_level - white_noise_lower_limit) / white_noise_upper_limit
        for noise_level in white_noise
    ]

    return white_noise_normalized
예제 #13
0
def huki():
    just_path = 'C:/Users/chyre/Desktop/Studia/V/TM/Projekt2/potrzebne pliki/huk_wav'
    os.chdir(just_path)
    waves = [f for f in listdir() if f.endswith('.wav')]

    lista_hukow = []
    for l in waves:
        lista_hukow.append(l)

        sampling_rate, data = read_wav(str(l))  # enter your filename

        x = signal.resample(data, 16000)
        print(x)

    return lista_hukow
예제 #14
0
def huki():
    just_path = 'C:/Users/chyre/Desktop/Studia/V/TM/Projekt2/potrzebne pliki/znormalizowane_huki'
    os.chdir(just_path)
    waves = [f for f in listdir() if f.endswith('.wav')]

    lista_hukow=[]
    for l in waves:
        lista_hukow.append(l)

        sampling_rate, data = read_wav(str(l))  # enter your filename

        huk = signal.resample(data, 16000)
        wav.write((just_path + '/' + str(l)), 16000, huk)

    return lista_hukow
예제 #15
0
        def _getFile(widget, name):
            filename = widget.text()
            if filename == '':
                raise Exception(('Cannot run convolution: must specify the {}'
                                 ).format(name))
            if not os.path.exists:
                raise Exception(
                    ('Cannot run convolution: path specified {} does not exist'
                     ).format(name))

            rate, data = read_wav(filename)
            data = pcm2float(data, 'float32')

            if len(data.shape) == 1:
                data = data.reshape((len(data), 1))

            return data, rate
 def get_test_data(self, batch_size, offset):
     total_size = len(self.test_set)
     real_batch_size = min(batch_size, total_size - offset)
     batch_data = np.zeros(
         (real_batch_size, self.preprocessor.fingerprint_size))
     filenames = []
     for i in range(offset, offset + real_batch_size):
         sample = self.test_set[i]
         # Load wav
         sr, signal = read_wav(sample['path'])
         filenames.append(sample['filename'])
         # Check signal length
         signal = self.preprocessor.check_audio_length(signal)
         # Featurize
         batch_data[i -
                    offset, :] = self.preprocessor.get_log_mel_spectrograms(
                        signal)
     return batch_data, filenames
예제 #17
0
def create_mfcc_array(output_path='data/mfcc_features.npy',
                      wav_list=WAV_LIST,
                      n_mfcc=N_MFCC,
                      sample_rate=SAMPLE_RATE,
                      duration=DURATION,
                      window_length=WINDOW_LENGTH,
                      step=STEP,
                      n_fft=N_FFT):
    n_frames = int(duration / step) - 1
    mfcc_features = np.zeros((len(wav_list), n_frames, n_mfcc))
    for i, wav_path in enumerate(wav_list):
        rate, signal = read_wav(wav_path)
        mfcc_features[i] = psf.mfcc(signal[:(duration * sample_rate)],
                                    samplerate=sample_rate,
                                    winlen=window_length,
                                    winstep=step,
                                    numcep=n_mfcc,
                                    nfft=n_fft)
    np.save(output_path, mfcc_features)
    return mfcc_features
예제 #18
0
from scipy.io.wavfile import read as read_wav
import os
#os.chdir('path') # change to the file directory
sampling_rate, data = read_wav("4.wav")  # enter your filename
print(sampling_rate)
예제 #19
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed May 29 10:02:44 2019

@author: tim
"""

# Going to put clips and information into same format as tutorial
# Could change this at a later date, by changing format that the tutorial accepts
# but for the moment, I'll keep using the instruments.csv file to record the
# the files and their class

# So, loop through sub directories in the tagged_recordings folder
# copy them into the audio_files_cacophony folder (cf wavfiles folder in the tutorial)
# and insert a row in the tags.csv file (cf the instruments folder of the tutorial)

# Test can load the file format
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy.io.wavfile import read as read_wav
from python_speech_features import mfcc, logfbank
import librosa as librosa

sampling_rate, data=read_wav('wavfiles/'+wav_file) 
예제 #20
0
    ~~~~~~~~~~~~~~

    Doing quick pythonic stuff.
"""

from scipy.io.wavfile import read as read_wav
from matplotlib import pyplot as plt, patches as mpl_patches, rcParams as mpl_params
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset

MAX_INT16 = 32768
mpl_params['axes.labelsize'] = 24
mpl_params['xtick.labelsize'] = 20
mpl_params['ytick.labelsize'] = 20

if __name__ == '__main__':
    sampling_rate, signal = read_wav(
        '../../lubos/data/wavs/SADB-2010-NTB/_data8kHz-cut/spk-1/spk-1-1.wav')
    signal = [float(s) / MAX_INT16 for s in signal]  # normalisation
    duration = float(len(signal)) / sampling_rate
    win_len = 0.025
    win_shift = 0.01

    fig, ax = plt.subplots()
    ax.plot([float(s) / sampling_rate for s in range(len(signal))],
            signal,
            alpha=0.3)
    ax.add_patch(
        mpl_patches.Rectangle((1, -0.01), 0.1, 0.02, fill=False, color='red'))
    col = ('magenta', 'red', 'orange', 'maroon')
    for i in range(int(duration / win_shift)):
        ax.add_patch(
            mpl_patches.Rectangle((i * win_shift, -0.48),
 def read_path(self):
     rate, data = read_wav(self.path)
     self.rate = rate
     self.data = data
예제 #22
0
def open_wave(filename):
    return read_wav(filename)
예제 #23
0
    t_1 = 0
    for n, t in zip(subject_trials['Subject{}'.format(subject_i)],
                    range(1,
                          np.shape(events_times)[0] + 1)):

        trial = np.transpose(data[t_1:t, :, :], (0, 2, 1))
        trial_reshape = np.concatenate(np.split(trial, n_splits, axis=1),
                                       axis=0)
        eegs.append(trial_reshape)

        t_1 = t
        audio_path_20000 = os.path.join(FULLAUDIODIR,
                                        r'20000/20000_{}.wav'.format(n))
        audio_path_Journey = os.path.join(FULLAUDIODIR,
                                          r'Journey/Journey_{}.wav'.format(n))
        _, twenty = read_wav(audio_path_20000)
        _, journey = read_wav(audio_path_Journey)
        twenty = rms_normalize(twenty[0:2646000])
        journey = rms_normalize(journey[0:2646000])
        clean_sound = twenty if subject_i <= 17 else journey
        unattended_sound = twenty if subject_i > 17 else journey

        clean_sound_reshape = np.concatenate(np.split(clean_sound[None],
                                                      n_splits,
                                                      axis=1),
                                             axis=0)
        unattended_sound = np.concatenate(np.split(unattended_sound[None],
                                                   n_splits,
                                                   axis=1),
                                          axis=0)
예제 #24
0
        plt.tight_layout()
        plt.show()

    return filters


if __name__ == '__main__':
    ''' Settings '''
    args = parse_arguments()
    input_file = args.input_file  # .wav file to process
    ms_dur = args.microsegment_dur  # microsegment duration [s]
    ms_shift_dur = args.shift_dur  # microsegment shifting duration [s]
    verbose = args.verbose  # print analysis and plot the mfcc?
    show_filters = args.show_filters  # show the mfcc filters?
    ''' Read, normalize and analyze wav signal '''
    f_s, s_raw = read_wav(input_file)  # sampling frequency [Hz], raw signal
    s = normalize(s=s_raw)  # signal mapped to <-1,1>
    s_len = len(s)  # signal length
    s_dur = float(s_len) / f_s  # signal duration [s]
    ms_len = int((ms_dur / s_dur) * s_len)  # microsegment length
    ms_shift = int((ms_shift_dur / s_dur) * s_len)  # microsegment shift
    n_ms = int(
        (s_len - ms_len) / ms_shift)  # number of microsegments (windows)
    bw = f_s / 2  # transmitted band [Hz]
    bw_m = int(hz2mel(bw))  # transmitted band [mel]
    n_filters = 40  # number of filters
    db_m = float(bw_m) / (n_filters + 1)  # delta m (for filter shift in mels)
    filter_len = (ms_len / 2)  # filter and also the final microsegment length
    f_def_step = float(
        bw) / filter_len  # frequency step, where the filter is defined [Hz]
    f_def_step_m = float(
예제 #25
0
        keys = matstruct_contents[0, 0].dtype.fields.keys()

        for t in range(20):
            val = matstruct_contents[0, t]
            if val['repetition'][0][0][0][0] == 0:
                part = val['part'][0][0][0][0]
                attended_track = val['attended_track'][0][0][0][0]
                tracks = [1, 2]
                tracks.remove(attended_track)
                unattended_track = tracks[0]

                attended_sound_name = 'part{}_track{}_dry.wav'.format(
                    part, attended_track)
                unattended_sound_name = 'part{}_track{}_dry.wav'.format(
                    part, unattended_track)
                afs, attended_sound = read_wav(
                    os.path.join(SOUNDDIR, attended_sound_name))
                attended_sound = rms_normalize(
                    np.copy(attended_sound).astype(float))
                ufs, unattended_sound = read_wav(
                    os.path.join(SOUNDDIR, unattended_sound_name))
                unattended_sound = rms_normalize(
                    np.copy(unattended_sound).astype(float))

                eeg = val['RawData'][0][0][0][0][1]

                attended_sound = attended_sound[:int(min_eeg_duration * afs)]
                unattended_sound = unattended_sound[:int(min_eeg_duration *
                                                         ufs)]
                eeg = eeg[:int(min_eeg_duration * 128)]

                eeg_reshape = np.concatenate(np.split(eeg[None],
import numpy
import numpy as np
import pylab
from matplotlib.cm import get_cmap
from matplotlib.pyplot import show, plot, title, figure, xlabel, ylabel, specgram, colorbar, stem
from matplotlib.pyplot import subplot, tight_layout
from scipy.fftpack import fft
from scipy.io.wavfile import read as read_wav
from scipy.io.wavfile import write
install(numpy)
install(matplotlib)
install(librosa)


if __name__ == "__main__":
    sampling_rate, data = read_wav("man_voice.wav")

    fft_out = fft(data)

    fnames = ["man_voice.wav", "szum.wav"]
    wavs = [wave.open(fn) for fn in fnames]
    frames = [w.readframes(w.getnframes()) for w in wavs]
    samples = [np.frombuffer(f, dtype='<i2') for f in frames]
    samples = [samp.astype(np.float64) for samp in samples]
    n = min(map(len, samples))
    mix = samples[0][:n] + samples[1][:n]
    mix_wav = wave.open("./mix.wav", 'w')
    mix_wav.setparams(wavs[0].getparams())
    mix_wav.writeframes(mix.astype('<i2').tobytes())
    mix_wav.close()
예제 #27
0
 def __init__(self, wav_file_path):
     self.fs, samples = read_wav(wav_file_path)
     super(WavPlayer, self).__init__(samples if np.isscalar(samples[0]) else samples[:,0])
예제 #28
0
def r4():
    from scipy.io.wavfile import read as read_wav
    import os
    sampling_rate, data = read_wav(
        "dataset/bubbling/00.wav")  # enter your filename
    print(sampling_rate)
	def __get_fs_array_from_audio(self, audio_file):
		sampling_rate, data=read_wav(audio_file)
		return (sampling_rate, data)
예제 #30
0
import json
import argparse
import numpy as np

from glob import glob
from chirp import Chirp
from beeper import Beeper
from speaker import Speaker
from datetime import datetime
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

import sounddevice as sd
from scipy.io.wavfile import read as read_wav

error_sampling, error_wav=read_wav("error.wav")
def play_error():
    sd.play(error_wav,error_sampling)

ready_sampling, ready_wav=read_wav("ready.wav")
def play_ready():
    sd.play(ready_wav,ready_sampling)

def get_output(mode, guide_tolerance, guide_delay):
    guide = None
    # guide_tolerance = 0.15
    # guide_delay = 0
    if mode == "speaker":
        guide = Speaker(tolerance=guide_tolerance, delay=guide_delay)
    elif mode == "beeper":
        guide = Beeper(tolerance=guide_tolerance, delay=guide_delay)
예제 #31
0
        path_myrecording = f"/Users/yuxinzhu/.spyder-py3/StreamlitSD/{filename}.wav"

        save_record(path_myrecording, myrecording, fs)
        record_state.text(f"Done! Saved sample as {filename}.wav")

        st.audio(read_audio(path_myrecording))

        fig = create_spectrogram(path_myrecording)
        st.pyplot(fig)

        ################ TO OBTAIN THE WEB-RECORDING .WAV SAMPLING RATE
        from scipy.io.wavfile import read as read_wav
        import os
        os.chdir('/Users/yuxinzhu/.spyder-py3/StreamlitSD/'
                 )  # change to the file directory
        sampling_rate, data1 = read_wav(
            f"{filename}.wav")  # enter your filename
        print(sampling_rate)
        st.markdown('*Vocal Recording Sampling Rate For:*  **output.wav**')
        st.markdown(sampling_rate)
        ################################
st.write(
    "**NOTE:** Please continue to record even if there is an *starting error shown below. * Thank You !"
)

################ TO UPLOAD DATA
st.title("Vocal Sample File Upload")
file_to_be_uploaded = st.file_uploader("Choose an audio file to upload",
                                       type="wav")
file_to_be_uploaded = st.file_uploader("Choose an audio file to upload",
                                       type="mp3")
################################