def handle(self, *args, **options): # audio_file_path = '/Users/yfukuzaw/workspace/koe/user_data/audio/wav/52/201911142.wav' # begin_ms = 234 # end_ms = 544 # fs, length = get_wav_info(audio_file_path) # read_segment(audio_file_path, beg_ms=begin_ms, end_ms=end_ms, mono=True, normalised=True) # audio_files = AudioFile.objects.filter(fs__gt=320000) # # for audio_file in audio_files: # segments = Segment.objects.filter(audio_file=audio_file) # ratio = 48000 / audio_file.fs # duration_ms = int(audio_file.length * 1000 /audio_file.fs) # for segment in segments: # beg_ms = segment.start_time_ms # end_ms = segment.end_time_ms # # new_beg = max(0, int(np.round(beg_ms * ratio))) # new_end = min(int(np.round(end_ms * ratio)), duration_ms) # # segment.start_time_ms = new_beg # segment.end_time_ms = new_end # # sid = segment.id # # print('Change syllable #{} from [{} - {}] to [{} - {}]'.format(sid, beg_ms, end_ms, new_beg, new_end)) # # segment.save() audio_files = AudioFile.objects.all() num_segments = Segment.objects.all().count() num_tested = 0 bar = Bar('Testing...', max=num_segments) for audio_file in audio_files: segments = Segment.objects.filter(audio_file=audio_file) num_segments = segments.count() if num_tested + num_segments < already_tested: num_tested += num_segments bar.next(num_segments) continue audio_file_path = wav_path(audio_file) for segment in segments: begin_ms = segment.start_time_ms end_ms = segment.end_time_ms if os.path.isfile(audio_file_path): read_segment(audio_file_path, beg_ms=begin_ms, end_ms=end_ms, mono=True, normalised=True) bar.next() num_tested += 1 bar.finish()
def setUp(self): filepath = 'tests/example 1.wav' self.fs, _ = wav_2_mono(filepath, normalised=False) long_segment = wavfile.read_segment(filepath, beg_ms=100, end_ms=300, mono=True) self.long_segment = np.ascontiguousarray(long_segment) short_segment = wavfile.read_segment(filepath, beg_ms=100, end_ms=149, mono=True) self.short_segment = np.ascontiguousarray(short_segment)
def _cached_get_segment_audio_data(audio_file_name, database_id, fs, start, end): wav_file_path = data_path('audio/wav/{}'.format(database_id), '{}.wav'.format(audio_file_name)) chunk = wavfile.read_segment(wav_file_path, start, end, normalised=False, mono=True) audio_segment = pydub.AudioSegment(chunk.tobytes(), frame_rate=fs, sample_width=chunk.dtype.itemsize, channels=1) audio_segment = _match_target_amplitude(audio_segment) out = io.BytesIO() audio_segment.export(out, format=settings.AUDIO_COMPRESSED_FORMAT) binary_content = out.getvalue() out.close() response = HttpResponse() response.write(binary_content) response['Content-Type'] = 'audio/' + settings.AUDIO_COMPRESSED_FORMAT response['Content-Length'] = len(binary_content) return response
def change_fs_without_resampling(wav_file, new_fs, new_name): """ Create a new wav file with the a new (fake) sample rate, without changing the actual data. This is necessary if the frequency of the wav file is higher than the maximum sample rate that the browser supports :param wav_file: path to the original wav file :param new_fs: the new sample rate :return: the path of the faked wav file """ size, comp, num_channels, rate, sbytes, block_align, bitrate, bytes, dtype = read_wav_info( wav_file) ubyte_data = read_segment(wav_file, 0, None, normalised=False, retype=False) byte_length = ubyte_data.size nframes_per_channel = byte_length // block_align byte_per_frame = bitrate // 8 uint8_data = ubyte_data.reshape( (nframes_per_channel, num_channels, byte_per_frame)).astype(np.uint8) if bitrate == 24: write_24b(new_name, new_fs, uint8_data) else: write(new_name, new_fs, uint8_data, bitrate=bitrate)
def get_sig(args): wav_file_path, fs, start, end, win_length, lpf, hpf = \ unroll_args(args, ['wav_file_path', 'fs', 'start', 'end', 'win_length', 'lpf', 'hpf']) if wav_file_path: sig = wavfile.read_segment(wav_file_path, start, end, mono=True, normalised=True, winlen=win_length) else: sig = args['sig'] return butter_bandpass_filter(sig, lpf, hpf, fs)
def cached_stft(wav_file_path, start, end, nfft, noverlap, win_length, window_name, center): chunk, fs = wavfile.read_segment(wav_file_path, start, end, normalised=True, mono=True, return_fs=True) return stft_from_sig(chunk, nfft, noverlap, win_length, window_name, center)
def extract_spectrogram(audio_file, segs_info): """ Extract raw sepectrograms for all segments (Not the masked spectrogram from Luscinia) of an audio file :param audio_file: :return: """ filepath = wav_path(audio_file) fs, duration = get_wav_info(filepath) if not os.path.isfile(filepath): raise CustomAssertionError("File {} not found".format(audio_file.name)) for tid, start, end in segs_info: seg_spect_path = get_abs_spect_path(tid) ensure_parent_folder_exists(seg_spect_path) sig = read_segment(filepath, beg_ms=start, end_ms=end, mono=True, normalised=True, return_fs=False, retype=True, winlen=window_size) _, _, s = signal.stft(sig, fs=fs, window=window, noverlap=noverlap, nfft=window_size, return_onesided=True) spect = np.abs(s * scale) height, width = np.shape(spect) spect = np.flipud(spect) spect = np.log10(spect) spect = ((spect - global_min_spect_pixel) / interval64) spect[np.isinf(spect)] = 0 spect = spect.astype(np.int) spect = spect.reshape((width * height, ), order='C') spect[spect >= 64] = 63 spect_rgb = np.empty((height, width, 3), dtype=np.uint8) spect_rgb[:, :, 0] = cm_red[spect].reshape((height, width)) * 255 spect_rgb[:, :, 1] = cm_green[spect].reshape((height, width)) * 255 spect_rgb[:, :, 2] = cm_blue[spect].reshape((height, width)) * 255 # roi_start = int(start / duration_ms * width) # roi_end = int(np.ceil(end / duration_ms * width)) # seg_spect_rgb = file_spect_rgb[:, roi_start:roi_end, :] seg_spect_img = Image.fromarray(spect_rgb) seg_spect_img.save(seg_spect_path, format='PNG') celerylogger.info('spectrogram {} created'.format(seg_spect_path))
def get_sig(args): wav_file_path, fs, start, end = unroll_args( args, ['wav_file_path', 'fs', 'start', 'end']) if wav_file_path: sig = wavfile.read_segment(wav_file_path, start, end, mono=True, normalised=True) else: sig = args['sig'] return sig
def test_read_segment(self): filepath = 'tests/example 1.wav' fs, sig = wav_2_mono(filepath, normalised=True) full_siglen = len(sig) max_end_ms = int(np.floor(full_siglen / fs * 1000)) # Test read_segment for the full duration (end_ms = None, beg_ms = 0) segment0 = wavfile.read_segment(filepath, mono=True) self.assertEqual(full_siglen, len(segment0)) # Test reading segments of different length from different starting points. # The returned segment must have the prescribed length for beg_ms in [0, 1, 2, 20, 30, 100]: for length_ms in [1, 100, 150, 153, 200, max_end_ms - beg_ms]: end_ms = beg_ms + length_ms segment1 = wavfile.read_segment(filepath, beg_ms=beg_ms, end_ms=end_ms, mono=True) segment1_len_ms = np.round(len(segment1) * 1000 / fs) self.assertEqual(segment1_len_ms, length_ms) framelength = 256 for beg_ms in [0, 1, 2, 20, 30, 100]: for length_ms in [1, 100, 150, 153, 200, max_end_ms - beg_ms]: correct_length = int(length_ms * fs / 1000) if correct_length % framelength != 0: correct_length = np.ceil( correct_length / framelength) * framelength end_ms = beg_ms + length_ms segment1 = wavfile.read_segment(filepath, beg_ms=beg_ms, end_ms=end_ms, mono=True, winlen=framelength) self.assertEqual(correct_length, len(segment1))
def wav_2_mono(file, **kwargs): """ Read a wav file and return fs and first channel's data stream. The data is normalised to be equivalent to Matlab's `audioread(...)` function :param file: :return: fs and signal """ data = wavfile.read_segment(file, **kwargs) if len(np.shape(data)) > 1: data = data[:, 0] fs, _ = get_wav_info(file) return fs, data
def test_read_segment(self): filepath = 'tests/example 1.wav' fs, sig = wav_2_mono(filepath, normalised=True) full_siglen = len(sig) # Test read_segment for the full duration (end_ms = None, beg_ms = 0) segment0 = wavfile.read_segment(filepath, mono=True) self.assertEqual(full_siglen, len(segment0)) # Test reading segments of different length from different starting points. # The returned segment must have the prescribed length for beg_ms in [0, 1, 2, 20, 30, 100]: for length_ms in [100, 150, 153, 200]: end_ms = beg_ms + length_ms segment1 = wavfile.read_segment(filepath, beg_ms=beg_ms, end_ms=end_ms, mono=True) segment1_len_ms = np.round(len(segment1) * 1000 / fs) self.assertEqual(segment1_len_ms, length_ms)
def get_sig(args): wav_file_path, fs, start, end, win_length = unroll_args( args, ['wav_file_path', 'fs', 'start', 'end', 'win_length']) if end and end - start < win_length: end = start + win_length if wav_file_path: sig = wavfile.read_segment(wav_file_path, start, end, mono=True, normalised=True) else: sig = args['sig'] return sig
def extract_mfcc(wav_file_path, fs, start, end, nfft, noverlap, filepath=None): sig = wavfile.read_segment(wav_file_path, beg_ms=start, end_ms=end, mono=True) args = dict(nfft=nfft, noverlap=noverlap, win_length=nfft, fs=fs, wav_file_path=None, start=0, end=None, sig=sig, center=True) value = mfcc(args) if filepath: with open(filepath, 'wb') as f: pickle.dump(value, f) else: return value
def extract_xfcc(segments, config, is_pattern=False, method_name='mfcc'): nsegs = len(segments) lower = int(config.get('lower', 20)) upper = int(config.get('upper', 8000)) ndelta = int(config.get('delta', 0)) nfilt = int(config.get('nfilt', 26)) nmfcc = int(config.get('nmfcc', nfilt / 2)) assert nmfcc <= nfilt xtrargs = { 'name': method_name, 'lowfreq': lower, 'highfreq': upper, 'numcep': nmfcc, 'nfilt': nfilt } if 'cepsfunc' in config: xtrargs['cepsfunc'] = config['cepsfunc'] if method_name in ['mfcc', 'bfcc', 'lfcc']: method = xfcc elif method_name == 'gfcc': lowhear = int(config.get('lowhear', 500)) hihear = int(config.get('hihear', 12000)) xtrargs['lowhear'] = lowhear xtrargs['hihear'] = hihear method = xfcc elif method_name in ['mfc', 'bfc', 'lfc']: method = xfc elif method_name == 'gfc': lowhear = int(config.get('lowhear', 500)) hihear = int(config.get('hihear', 12000)) xtrargs['lowhear'] = lowhear xtrargs['hihear'] = hihear method = xfc else: raise Exception('No such method: {}'.format(method_name)) lower = xtrargs['lowfreq'] upper = xtrargs['highfreq'] nmfcc = xtrargs['numcep'] bar = Bar('Extracting {} Range={}~{}, nCoefs={}, delta={}'.format( method_name, lower, upper, nmfcc, ndelta), max=nsegs, suffix='%(index)d/%(max)d %(elapsed)ds/%(eta)ds') if is_pattern: cache = {} original_segment_ids = np.array(segments.values_list('id', flat=True), dtype=np.int32) # Sort by duration so that we can cache them effectively segments = segments.annotate(duration=F('end_time_ms') - F('start_time_ms')).order_by('duration') duration_sorted_segment_ids = np.array(segments.values_list('id', flat=True), dtype=np.int32) # We need the index array in order to restore the original order: ascending_sorted_idx = np.sort(original_segment_ids) ascending_sorted_to_original_order = np.searchsorted( ascending_sorted_idx, original_segment_ids) duration_sorted_to_ascending_sorted_order = np.argsort( duration_sorted_segment_ids) duration_sorted_to_original_order = duration_sorted_to_ascending_sorted_order[ ascending_sorted_to_original_order] sorted_mfcc = [] segments_info = segments.values_list('duration', 'audio_file__fs') for duration, fs in segments_info: if duration not in cache: cache = {duration: {}} if fs not in cache[duration]: chirps = [] for amp_profile_name in amp_profile_names: for f0_profile_name in f0_profile_names: chirp = generate_chirp(f0_profile_name, amp_profile_name, duration, fs) chirps.append(chirp) cache[duration][fs] = chirps if 'ft' not in cache[duration]: chirps = cache[duration][fs] mfcc_fts = [] for chirp in chirps: mfcc_ft = _extract_xfcc(chirp, fs, method, xtrargs, ndelta) mfcc_fts.append(mfcc_ft) cache[duration]['ft'] = mfcc_fts else: mfcc_fts = cache[duration]['ft'] sorted_mfcc.append(mfcc_fts) bar.next() mfccs = np.array(sorted_mfcc)[duration_sorted_to_original_order] else: mfccs = [] segment_data = {} for segment in segments: fs = segment.audio_file.fs file_url = wav_path(segment.audio_file) sig = wavfile.read_segment(file_url, segment.start_time_ms, segment.end_time_ms, mono=True) mfcc_fts = _extract_xfcc(sig, fs, method, xtrargs, ndelta) segment_data['s' + str(id)] = dict(sig=sig, fs=fs, ft=mfcc_fts) mfccs.append(mfcc_fts) bar.next() mfccs = np.array(mfccs) import scipy.io as sio sio.savemat('/tmp/segment_data.mat', segment_data) bar.finish() return mfccs