def process_directory(dir, index, library): signal, rate = Audio.read(f'{path}/{dir}', sr=sampling_rate, normalize=True) signal = np.array(signal) segment_time = 5 # arredonda o sinal de audio para multiplo de 5 signal = signal[:len(signal) - len(signal) % (rate * segment_time)] # avalia quantos segmentos têm em uma audio segments = len(signal) // (rate * segment_time) augsize = int(augment[0]) if len(augment) > 0 else 0 m = { 'attrs': [], 'labels': [index] * (n_segments or segments) * (1 + augsize), 'classes': [{ index: (n_segments or segments) * (1 + augsize) }] } for i in range(segments): if n_segments and i >= n_segments: continue samples = [Audio.segment(signal, rate, seconds=segment_time, window=i)] n_mfcc = 13 n_mels = 26 n_fft = 2048 # Janela e overlapping (em amostras) hop_length = 512 win_length = 1024 # Janela e overlapping (em tempo) win_len = win_length / rate win_hop = hop_length / rate lifter = 22 fmin = 0 fmax = rate / 2 coef_pre_enfase = 0.97 append_energy = 0 if augment: for _ in range(int(augment[0])): flag = False aug = samples[0] if random.uniform() > 0.5 and 'cut' in augment: aug = _cut(aug, rate) flag = True if random.uniform() > 0.5 and 'noise' in augment: aug = _noise(aug, rate) flag = True if not flag and len(augment) == 3: if random.uniform() > 0.5: aug = _cut(aug, rate) else: aug = _noise(aug, rate) samples.append(aug) for sample_index, sample in enumerate(samples): if library == 'stft': attr = np.abs( np.array(stft(sample, n_fft=n_fft, hop_length=hop_length))) if library == 'melbanks': sample = sample[newaxis, :] melfbanks = MelFilterbanks(sample_rate=rate) attr = melfbanks(sample) attr = np.array(attr).T if library == 'psf': attr = mfcc(signal=sample, samplerate=rate, winlen=win_len, winstep=win_hop, numcep=n_mfcc, nfilt=n_mels, nfft=n_fft, lowfreq=fmin, highfreq=fmax, preemph=coef_pre_enfase, ceplifter=lifter, appendEnergy=append_energy, winfunc=hann) attr = np.array(attr) # Visualization.plot_cepstrals( # attr, fig_name=f'teste.png') # Visualization.plot_audio( # sample, rate, fig_name='./teste.png') # Audio.write( # f'portuguese/processed/psf/{dir}_{i}_{sample_index}.wav', sample, rate) m['attrs'].append(attr.tolist()) del attr del signal return m
model = load(open(filename_holder + 'model.h5', 'rb')) signal, rate = librosa.load(args['inferencia'], sr=sampling_rate) # signal = Audio.trim(signal) segment_time = 5 signal = signal[:len(signal) - len(signal) % (rate * segment_time)] segments = len(signal) // (rate * segment_time) mfcc_audios = [] for i in range(segments): sample = Audio.segment(signal, rate, seconds=segment_time, window=i) n_mfcc = 13 n_mels = 26 n_fft = 2048 # Janela e overlapping (em amostras) hop_length = 512 win_length = 1024 # Janela e overlapping (em tempo) win_len = win_length / rate win_hop = hop_length / rate lifter = 22 fmin = 0 fmax = rate / 2 coef_pre_enfase = 0.97 append_energy = 0