コード例 #1
0
def test_local_files():
    input_signal = wavefile.load(filename="./audio_dataset/test/hi_hat/ALCHH36.WAV")
    impulse_response = wavefile.load(filename="./impulse_responses/spaceEchoIR.wav")  # already floating point bytearray
    second_IR = wavefile.load(filename="./impulse_responses/echo2IR.wav")
    (left, right) = input_signal[1]
    output_signal = signal.fftconvolve(input_signal[1][0], impulse_response[1][0]) 
    output_signal2 = signal.fftconvolve(output_signal, second_IR[1][0])
    wavfile.write("./audio_dataset/convolved_hihat.wav", 44100, utility.float2pcm(output_signal2))
コード例 #2
0
	def test_save(self) :
		samplerate = 44100
		data = self.fourSinusoids(samples=400)
		wavefile.save("file.wav", data, samplerate=samplerate)
		readsamplerate, readdata = wavefile.load("file.wav")
		np_assert_almost_equal(readdata, data, decimal=7)
		self.assertEqual(readsamplerate, samplerate)
コード例 #3
0
 def test_save(self):
     samplerate = 44100
     data = self.fourSinusoids(samples=400)
     wavefile.save("file.wav", data, samplerate=samplerate)
     readsamplerate, readdata = wavefile.load("file.wav")
     np_assert_almost_equal(readdata, data, decimal=7)
     self.assertEqual(readsamplerate, samplerate)
コード例 #4
0
	def test_save_asCOrder(self) :
		samplerate = 44100
		data = self.fourSinusoids(samples=400)
		data = np.ascontiguousarray(data)
		wavefile.save("file.wav", data, samplerate=samplerate)
		readsamplerate, readdata = wavefile.load("file.wav")
		np_assert_almost_equal(readdata, data, decimal=7)
		self.assertEqual(readsamplerate, samplerate)
コード例 #5
0
 def test_save_asCOrder(self):
     samplerate = 44100
     data = self.fourSinusoids(samples=400)
     data = np.ascontiguousarray(data)
     wavefile.save("file.wav", data, samplerate=samplerate)
     readsamplerate, readdata = wavefile.load("file.wav")
     np_assert_almost_equal(readdata, data, decimal=7)
     self.assertEqual(readsamplerate, samplerate)
コード例 #6
0
 def __init__(self, filename: str):
     wf = wavefile.load(filename=filename)
     self.signal_vector = wf[1]
     if self.signal_vector.shape[0] == 2:
         self.left = self.signal_vector[0]
         self.right = self.signal_vector[1]            
     elif self.signal_vector.shape[0] == 1:
         self.mono = self.signal_vector[0]
コード例 #7
0
def reduce_noise(path):
    file = wavefile.load(path)
    samplerate = file[0]
    data = file[1][0]
    nr_data = nr.reduce_noise(audio_clip=np.array(data),
                              noise_clip=np.array(data[samplerate:2 *
                                                       samplerate]),
                              verbose=False)
    sf.write(path, np.array(list(np.float_(nr_data))), samplerate)
    return
コード例 #8
0
ファイル: 2.py プロジェクト: n1xDev/ml-voice
def getMinMaxAmpl(filename):
    w = wavefile.load(filename)
    signal = w[1][0]
    frames = str(len(signal))+" frames"
    minAmpl = str(min(abs(signal))*100)
    maxAmpl = str(max(abs(signal))*100)
    res = []
    res.append(minAmpl)
    res.append(maxAmpl)
    return res
コード例 #9
0
 def assertLoadWav(self,
                   filename,
                   expectedData=None,
                   expectedSamplerate=44100,
                   expectedShape=None):
     samplerate, data = wavefile.load("file.wav")
     if expectedShape is not None:
         self.assertEqual(data.shape, expectedShape)
     if expectedData is not None:
         np_assert_almost_equal(expectedData, data, decimal=7)
     self.assertEqual(expectedSamplerate, samplerate)
コード例 #10
0
ファイル: audition.py プロジェクト: steveb/bogt
    def take_action(self, parsed_args):
        self.conf = config.load_config()
        self.tsl = os.path.abspath(parsed_args.tsl)
        if not os.path.exists(self.tsl) or not os.path.isfile(self.tsl):
            raise Exception('TSL file not found: %s' % self.tsl)
        self.tsl_name = os.path.splitext(os.path.basename(self.tsl))[0]

        self.no_record = parsed_args.no_record
        if not parsed_args.no_record:
            if parsed_args.dest:
                dest = parsed_args.dest
            else:
                dest = '%s.d' % parsed_args.tsl
            self.dest = self.prep_dest(dest)
        self.liveset = tsl.load_tsl_from_file(parsed_args.tsl, self.conf)
        sr, d = wavefile.load(parsed_args.sample)
        self.sample_rate = sr
        self.play_data = d.T
        self.session = io.Session(self.conf, fake=parsed_args.no_send)
        self.audition()
コード例 #11
0
def reduce_noise(song_file):
    # use wavefile module to convert wav from int to float
    w = wv.load(song_file)
    data = w[1][0]

    # select section of data that is noise
    noisy_part = data[1500:2000]

    # perform noise reduction
    reduced_noise = nr.reduce_noise(audio_clip=data,
                                    noise_clip=noisy_part,
                                    n_std_thresh=1.5,
                                    prop_decrease=1,
                                    verbose=False)

    samplerate = 44100
    write("reduced_noise_file.wav", samplerate, data)

    # remember to delete this file after
    return
コード例 #12
0
def analyzeHarmonicRatios(grain):
    maxPermissableFreq = 4409
    #Maximum to get 4 harmonics
    numHarmonics = 4
    w = wavefile.load(grain["file"])
    data = w[1][0]
    s = source(grain["file"], w[0], len(data))
    samplerate = s.samplerate

    # Compute the fundamental using the "yin" algorithm
    pitch_o = pitch("yin", len(data), len(data), samplerate)
    samples, read = s()
    fundamental = pitch_o(samples)[0]

    if (fundamental > maxPermissableFreq):
        return None

    # Get the periodogram to get energies at harmonics
    data = data * numpy.hanning(len(data))
    f, Pxx_den = signal.periodogram(data, w[0])
    Pxx_den = 10 * numpy.log10(Pxx_den)

    # Set the current harmonic to be twice the fundamental
    fundEnergy = Pxx_den[freqToBin(f, fundamental)]
    curHarm = fundamental * 2
    curHarmCount = 0
    ratios = []

    while (curHarmCount < numHarmonics):
        ratio = fundEnergy / Pxx_den[freqToBin(f, curHarm)]
        #Do not allow infinites, probably caused by 0 energy
        if math.isnan(ratio) or math.isinf(ratio):
            print("Ratio " + str(curHarmCount) + " is " + str(ratio))
            return None
        ratios.append(fundEnergy / Pxx_den[freqToBin(f, curHarm)])
        curHarm += fundamental
        curHarmCount += 1

    return ratios
コード例 #13
0
def analyzeHarmonicRatios(grain):
    maxPermissableFreq = 4409
    #Maximum to get 4 harmonics
    numHarmonics = 4
    w = wavefile.load(grain["file"])
    data = w[1][0]
    s = source(grain["file"], w[0], len(data))
    samplerate = s.samplerate

    # Compute the fundamental using the "yin" algorithm
    pitch_o = pitch("yin", len(data), len(data), samplerate)
    samples, read = s()
    fundamental = pitch_o(samples)[0]

    if (fundamental > maxPermissableFreq):
        return None

    # Get the periodogram to get energies at harmonics
    data = data * numpy.hanning(len(data))
    f, Pxx_den = signal.periodogram(data, w[0])
    Pxx_den = 10 * numpy.log10(Pxx_den)
    
    # Set the current harmonic to be twice the fundamental
    fundEnergy = Pxx_den[freqToBin(f, fundamental)]
    curHarm = fundamental * 2
    curHarmCount = 0
    ratios = []

    while(curHarmCount < numHarmonics):
        ratio = fundEnergy / Pxx_den[freqToBin(f, curHarm)]
        #Do not allow infinites, probably caused by 0 energy
        if math.isnan(ratio) or math.isinf(ratio):
            print("Ratio " + str(curHarmCount) + " is " + str(ratio))
            return None
        ratios.append(fundEnergy / Pxx_den[freqToBin(f, curHarm)])
        curHarm += fundamental
        curHarmCount += 1

    return ratios
コード例 #14
0
    def normalization_factor(self) -> float:
        return self.step_response().max()
    def as_convolved_filter(self):
        # RENAME
        return self.byte_array        
    def raw_signal_channels(self):
        return [self.byte_array]

@convolve.register
def _alias0(signal1: numpy.ndarray, signal2: numpy.ndarray) -> numpy.ndarray:
    return signal.fftconvolve(signal1, signal2)
@convolve.register
def _alias3(filter1: MonoFilter, filter: numpy.ndarray) -> numpy.ndarray:
    return  convolve(filter1.byte_array, filter)    

input_signal = wavefile.load(filename="./audio_dataset/test/hi_hat/ALCHH36.WAV")
impulse_response = wavefile.load(filename="./impulse_responses/spaceEchoIR.wav")  # already floating point bytearray
second_IR = wavefile.load(filename="./impulse_responses/echo2IR.wav")
(left) = input_signal[1]
wv = wavefile.load("/users/usuario/Desktop/bad.wav")
(trackleft, trackright) = wv[1]

(irleft) = impulse_response[1]
mf = MonoFilter(irleft)
#    convolve(trackleft, mf)
#    convolve(mf, trackleft)
[irrealleft] = irleft
convolve(trackleft, irrealleft)
convolve(MonoFilter(irrealleft), trackleft)
convolve(trackleft, MonoFilter(irrealleft))
mf = MonoFilter(irrealleft)
コード例 #15
0
ファイル: embed.py プロジェクト: MeMAD-project/trecvid-vsum
        help="Model file.")
    args = parser.parse_args()

    # ===============================================
    #           Feature extraction
    # ===============================================

    # Get the type of the signal file
    file_name = args.signal.split("/")[-1]
    file_format = file_name.split(".")[1]

    # Load signal - for now, only works with wav or numpy files
    if file_format == "npy":
        signal = np.load(args.signal)
    else:
        (rate, sig) = wavefile.load(args.signal)
        signal = sig[0]

    # Frame and compute MFCCs
    S = np.transpose(
        frame(signal, int(args.frame_len * 16),
              int(args.hop_len *
                  16)))  # For now, only 16kHz sampling rate can be used
    X = list(map(lambda s: feature_extractor(s, 16000), S))
    X = np.array(np.swapaxes(X, 1, 2))
    X = X.astype(
        np.float16
    )  # Compression to save memory, 16-bit MFCCs have also been used in the training of the current_best.h5
    num_timesteps = X.shape[1]

    # ===============================================
コード例 #16
0
def wav_to_floats(filename):
    w = wavefile.load(filename)
    return w[1][0]
コード例 #17
0
import numpy as np

# Lets setup some synthesis audio:

def sinusoid(samples, f, samplerate=44100):
    return np.sin(np.linspace(0, 2*np.pi*f*samples/samplerate, samples))[:,np.newaxis]

def channels(*args):
    return np.hstack(args).T

audio = channels(
    sinusoid(100000,  440),
    sinusoid(100000,  880),
    sinusoid(100000, 1760),
)

# This is how you save it
wavefile.save("sinusoid.wav", audio, 44100)

# And this is how you load it again
loadedsamplerate, loaded = wavefile.load("sinusoid.wav")


print("Loaded audio has shape", loaded.shape)

channel1, channel2, channel3 = loaded




コード例 #18
0
def getSignal(utterance):
    samplerate, signal = wavefile.load(utterance)
    print(signal)
    signal = signal[0]
    #print(utterance, 'dtype:', signal.dtype, 'min:', min(signal), 'max:', max(signal), 'samplerate:', samplerate)
    return signal, samplerate
コード例 #19
0
 def test_load(self):
     data = self.fourSinusoids(samples=400)
     self.writeWav("file.wav", data)
     readsamplerate, readdata = wavefile.load("file.wav")
     np_assert_almost_equal(readdata, data, decimal=7)
     self.assertEqual(readsamplerate, 44100)
コード例 #20
0
def transcript_label_generator(audio_file, paths):

    # Audio
    os.chdir(paths[0])
    (rate, sig) = wavefile.load(audio_file)

    # Words to be excluded
    bad_words = [[], ["uh", "huh", "uh-huh", "uh_huh"]]
    bad_commas = [None, ".", ",", "?"]

    tc = np.zeros((len(sig[0]), 2))

    for j in np.arange(2):

        os.chdir(paths[j + 1])
        audio_id = audio_file.split(".")[0]
        tc_files = glob.glob(audio_id + "*")

        for i, file in enumerate(tc_files):
            tree = ET.parse(file)
            root = tree.getroot()

            # Speaker indexing
            speaker = i + 1

            for child in root:
                v = child.attrib
                word = child.text

                # Determine if word is excluded
                excword = exclude_word(v, word, bad_commas + bad_words[j])
                if excword == True:
                    continue
                else:
                    word = excword[0]
                    start = excword[1]
                    end = excword[2]

                # Mark indices with overlap
                temp_sig = tc[start:end, j]
                ol_indices = np.where(temp_sig != 0)[0] + start
                tc[ol_indices, j] = -1

                # Individual speaker indices
                is_indices = np.where(temp_sig == 0)[0] + start
                tc[is_indices, j] = speaker

    sig = sig[0]

    # Initialize final transcriptions
    vad_tc = np.zeros((len(sig)))

    # Intersection of segments with one speaker
    os_indices_words = np.where(tc[:, 0] > 0)[0]
    os_indices_ASR = np.where(tc[:, 1] > 0)[0]
    os_indices = np.intersect1d(os_indices_words, os_indices_ASR)

    # Intersection of segments with multiple speakers
    ms_indices_words = np.where(tc[:, 0] == -1)[0]
    ms_indices_ASR = np.where(tc[:, 1] == -1)[0]
    ms_indices = np.intersect1d(ms_indices_words, ms_indices_ASR)

    # Concatenation + VAD
    vad_tc[os_indices] = tc[os_indices, 0]
    vad_tc[ms_indices] = tc[ms_indices, 0]
    vad_indices = np.where(vad_tc != 0)[0]
    vad_tc = vad_tc[vad_indices]
    sig = sig[vad_indices]

    transcript = vad_tc

    return sig, transcript
コード例 #21
0
        "5539381671692122744.mp4": [],
        "5542003749222140011.mp4": [],
        "5544574287152993687.mp4": [],
        "5544620672795594434.mp4": [],
        "5547193787702629969.mp4": [],
        "5549784941472309008.mp4": [],
        "5552368364300855101.mp4": [],
        "5555325449284154780.mp4": [],
        "5555360238519252381.mp4": []
    }
    datapath = f"./data/{d}"
    wavs = [f.name for f in os.scandir(datapath) if f.name.endswith(".wav")]
    wavs.sort()
    for wavfile in wavs:
        print(f"Diarizing file {wavfile} now.")
        (rate, sig) = wavefile.load(f"{datapath}/{wavfile}")
        signal = sig[0]
        S = np.transpose(frame(signal, int(2000 * 16), int(500 * 16)))
        X = list(map(lambda s: fe(s, 16000), S))
        X = np.array(np.swapaxes(X, 1, 2))
        X = X.astype(np.float16)
        num_timesteps = X.shape[1]

        if num_timesteps != 201:
            emb_model.layers.pop(0)
            new_input = Input(batch_shape=(None, num_timesteps, 30))
            new_output = emb_model(new_input)
            emb_model = Model(new_input, new_output)

        embs = emb_model.predict(X)
        try:
コード例 #22
0
	def test_load(self) :
		data = self.fourSinusoids(samples=400)
		self.writeWav("file.wav", data)
		readsamplerate, readdata = wavefile.load("file.wav")
		np_assert_almost_equal(readdata, data, decimal=7)
		self.assertEqual(readsamplerate, 44100)
コード例 #23
0

maximum_length = 800000  # this is about the maximum length
labels = {"snare": 1, "kick": 2, "hi_hat": 3}

# load training data
training_labels = []
training_values = []

file_directory = './audio_dataset/train/hi_hat'
file_list = [
    f for f in os.listdir(file_directory)
    if os.path.isfile(os.path.join(file_directory, f)) and (f != '.DS_Store')
]
for fname in file_list:
    imported_wave = wavefile.load(filename=file_directory + "/" + fname)
    mono_channel = imported_wave[1][0]  # we want the left channel, or mono
    #mono_channel = numpy.concatenate((hp_filter.convolve(mono_channel), lp_filter.convolve(mono_channel), bp_filter.convolve(mono_channel)))
    mono_channel = complex_coefficients(mono_channel)
    normalized_channel = (
        numpy.array(mono_channel)
    ) / 50  # DO THIS IN ONE PASS AS PART OF COMPLEX COEFFICIENTS -- PASS NORMALIZATION FACTOR
    training_labels.append(labels["hi_hat"])
    training_values.append(padded(normalized_channel, maximum_length))
    print("done")

file_directory = './audio_dataset/train/snare'
file_list = [
    f for f in os.listdir(file_directory)
    if os.path.isfile(os.path.join(file_directory, f)) and (f != '.DS_Store')
]