def set_name(cls, objs, name, extras={}): if len(objs) != 1: raise CustomAssertionError( 'Can\'t set the same name to more than 1 song.') obj = objs[0] is_unique = not AudioFile.objects.filter(database=obj.database, name=name).exists() if not is_unique: raise CustomAssertionError('File {} already exists'.format(name)) # If audio file is original, change the actual audio files' names as well if obj.is_original(): old_name = obj.name old_name_wav = wav_path(obj) old_name_compressed = audio_path(obj, settings.AUDIO_COMPRESSED_FORMAT) try: obj.name = name obj.save() new_name_wav = wav_path(obj) new_name_compressed = audio_path( obj, settings.AUDIO_COMPRESSED_FORMAT) os.rename(old_name_wav, new_name_wav) os.rename(old_name_compressed, new_name_compressed) except Exception as e: obj.name = old_name obj.save() raise CustomAssertionError('Error changing name') else: obj.name = name obj.save()
def delete_audio_files_async(*args, **kwargs): audio_files = AudioFile.fobjs.filter(active=False) # Mark all segments belong to these audio files as to be deleted, then delete them segments = Segment.objects.filter(audio_file__in=audio_files) segments.update(active=False) delete_segments_async() # Now delete the audio files audio_files_ids = audio_files.values_list('id', flat=True) ExtraAttrValue.objects.filter(attr__klass=AudioFile.__name__, owner_id__in=audio_files_ids).delete() # If the audio file is not original - just delete the model # Otherwise, search if there are clones. If there are, make one of the clones the new original # If there is no clone, delete the real audio files (wav and mp4) for af in audio_files: if af.original is None: clones = AudioFile.objects.filter(original=af).order_by('id') first_clone = clones.first() # If there are clones, make the first clone original of the remaining # Also move the real audio file to the database's folder of the clone if first_clone: old_wav_file = wav_path(af) old_mp4_file = audio_path(af, settings.AUDIO_COMPRESSED_FORMAT) clones.update(original=first_clone) first_clone.original = None first_clone.save() new_wav_file = wav_path(first_clone) new_mp4_file = audio_path(first_clone, settings.AUDIO_COMPRESSED_FORMAT) os.rename(old_wav_file, new_wav_file) os.rename(old_mp4_file, new_mp4_file) # Otherwise, delete the audio files too else: wav = wav_path(af) mp4 = audio_path(af, settings.AUDIO_COMPRESSED_FORMAT) if os.path.isfile(wav): os.remove(wav) if os.path.isfile(mp4): os.remove(mp4) af.delete()
def spect_from_seg(seg, extractor): af = seg.audio_file wav_file_path = wav_path(af) fs = af.fs start = seg.start_time_ms end = seg.end_time_ms return extractor(wav_file_path, fs=fs, start=start, end=end)
def handle(self, testfile, fmt, *args, **options): if testfile is None: audio_files = AudioFile.objects.filter(original=None) conversion_list = [] for af in audio_files: wav_file_path = wav_path(af) conversion_scheme = dict(wav=wav_file_path) target_file_path = audio_path(af, fmt) conversion_scheme['other'] = (fmt, target_file_path) conversion_list.append(conversion_scheme) bar = Bar('Converting song ...', max=len(conversion_list)) for conversion_scheme in conversion_list: convert(conversion_scheme, print_stats=False) bar.next() bar.finish() else: target_file_path = '/tmp/test-compress-wav.' + fmt conversion_scheme = dict(wav=testfile, other=(fmt, target_file_path)) convert(conversion_scheme) os.remove(target_file_path)
def extract_syllables(database_name, spect_dir, format): database = get_or_error(Database, dict(name__iexact=database_name)) segments = Segment.objects.filter(audio_file__database=database) extractor = extractors[format] audio_file_dict = {} for seg in segments: af = seg.audio_file if af in audio_file_dict: info = audio_file_dict[af] else: info = [] audio_file_dict[af] = info info.append((seg.id, seg.start_time_ms, seg.end_time_ms)) bar = Bar('Exporting segments ...', max=len(segments)) for af, info in audio_file_dict.items(): wav_file_path = wav_path(af) fs = af.fs for sid, start, end in info: spect_name = '{}.{}'.format(sid, format) spect_path = os.path.join(spect_dir, spect_name) if not os.path.isfile(spect_path): extractor(wav_file_path, fs, start, end, spect_path) bar.next() bar.finish()
def extract_spectrogram(audio_file_id): """ Extract raw sepectrograms for all segments (Not the masked spectrogram from Luscinia) of an audio file :param audio_file: :return: """ audio_file = AudioFile.objects.get(id=audio_file_id) segs_info = Segment.objects.filter(audio_file=audio_file).values_list( 'tid', 'start_time_ms', 'end_time_ms') missing_segs_info = [] for tid, start, end in segs_info: seg_spect_path = spect_fft_path(tid, 'syllable') ensure_parent_folder_exists(seg_spect_path) if not os.path.isfile(seg_spect_path): missing_segs_info.append((seg_spect_path, start, end)) if len(missing_segs_info) > 0: filepath = wav_path(audio_file) fs, sig = wav_2_mono(filepath) duration_ms = len(sig) * 1000 / fs _, _, s = signal.stft(sig, fs=fs, window=window, noverlap=noverlap, nfft=window_size, return_onesided=True) file_spect = np.abs(s * scale) height, width = np.shape(file_spect) file_spect = np.flipud(file_spect) file_spect = np.log10(file_spect) file_spect = ((file_spect - global_min_spect_pixel) / interval64) file_spect[np.isinf(file_spect)] = 0 file_spect = file_spect.astype(np.int) file_spect = file_spect.reshape((width * height, ), order='C') file_spect[file_spect >= 64] = 63 file_spect_rgb = np.empty((height, width, 3), dtype=np.uint8) file_spect_rgb[:, :, 0] = cm_red[file_spect].reshape( (height, width)) * 255 file_spect_rgb[:, :, 1] = cm_green[file_spect].reshape( (height, width)) * 255 file_spect_rgb[:, :, 2] = cm_blue[file_spect].reshape( (height, width)) * 255 for path, start, end in missing_segs_info: roi_start = int(start / duration_ms * width) roi_end = int(np.ceil(end / duration_ms * width)) seg_spect_rgb = file_spect_rgb[:, roi_start:roi_end, :] seg_spect_img = Image.fromarray(seg_spect_rgb) seg_spect_img.save(path, format='PNG')
def extract_segment_features_for_segments(task, sids, features, f2bs, force=False): preserved = Case(*[When(id=id, then=pos) for pos, id in enumerate(sids)]) segments = Segment.objects.filter(id__in=sids).order_by(preserved) tids = np.array(segments.values_list('tid', flat=True), dtype=np.int32) f2tid2fvals = {} f2af2segments = {} n_calculations = 0 for feature in features: index_filename, value_filename = f2bs[feature] if force: tids_target = tids else: existing_tids = binstorage.retrieve_ids(index_filename) sorted_ids, sort_order = np.unique(existing_tids, return_index=True) non_existing_idx = np.where( np.logical_not(np.isin(tids, sorted_ids))) missing_tids = tids[non_existing_idx] tids_target = missing_tids af_to_segments = {} for segment in segments.order_by('audio_file', 'start_time_ms'): tid = segment.tid af = segment.audio_file if tid in tids_target: if af not in af_to_segments: af_to_segments[af] = [] af_to_segments[af].append( (tid, segment.start_time_ms, segment.end_time_ms)) f2af2segments[feature] = af_to_segments n_calculations += len(af_to_segments) if n_calculations: task.start(limit=n_calculations) for feature, af_to_segments in f2af2segments.items(): _tids = [] _fvals = [] for af, segs_info in af_to_segments.items(): wav_file_path = wav_path(af) __tids, __fvals = extract_segment_feature_for_audio_file( wav_file_path, segs_info, feature) _tids += __tids _fvals += __fvals task.tick() f2tid2fvals[feature] = (_tids, _fvals) return tids, f2tid2fvals
def handle(self, *args, **options): # audio_file_path = '/Users/yfukuzaw/workspace/koe/user_data/audio/wav/52/201911142.wav' # begin_ms = 234 # end_ms = 544 # fs, length = get_wav_info(audio_file_path) # read_segment(audio_file_path, beg_ms=begin_ms, end_ms=end_ms, mono=True, normalised=True) # audio_files = AudioFile.objects.filter(fs__gt=320000) # # for audio_file in audio_files: # segments = Segment.objects.filter(audio_file=audio_file) # ratio = 48000 / audio_file.fs # duration_ms = int(audio_file.length * 1000 /audio_file.fs) # for segment in segments: # beg_ms = segment.start_time_ms # end_ms = segment.end_time_ms # # new_beg = max(0, int(np.round(beg_ms * ratio))) # new_end = min(int(np.round(end_ms * ratio)), duration_ms) # # segment.start_time_ms = new_beg # segment.end_time_ms = new_end # # sid = segment.id # # print('Change syllable #{} from [{} - {}] to [{} - {}]'.format(sid, beg_ms, end_ms, new_beg, new_end)) # # segment.save() audio_files = AudioFile.objects.all() num_segments = Segment.objects.all().count() num_tested = 0 bar = Bar('Testing...', max=num_segments) for audio_file in audio_files: segments = Segment.objects.filter(audio_file=audio_file) num_segments = segments.count() if num_tested + num_segments < already_tested: num_tested += num_segments bar.next(num_segments) continue audio_file_path = wav_path(audio_file) for segment in segments: begin_ms = segment.start_time_ms end_ms = segment.end_time_ms if os.path.isfile(audio_file_path): read_segment(audio_file_path, beg_ms=begin_ms, end_ms=end_ms, mono=True, normalised=True) bar.next() num_tested += 1 bar.finish()
def extract_spectrogram(audio_file, segs_info): """ Extract raw sepectrograms for all segments (Not the masked spectrogram from Luscinia) of an audio file :param audio_file: :return: """ filepath = wav_path(audio_file) fs, duration = get_wav_info(filepath) if not os.path.isfile(filepath): raise CustomAssertionError("File {} not found".format(audio_file.name)) for tid, start, end in segs_info: seg_spect_path = get_abs_spect_path(tid) ensure_parent_folder_exists(seg_spect_path) sig = read_segment(filepath, beg_ms=start, end_ms=end, mono=True, normalised=True, return_fs=False, retype=True, winlen=window_size) _, _, s = signal.stft(sig, fs=fs, window=window, noverlap=noverlap, nfft=window_size, return_onesided=True) spect = np.abs(s * scale) height, width = np.shape(spect) spect = np.flipud(spect) spect = np.log10(spect) spect = ((spect - global_min_spect_pixel) / interval64) spect[np.isinf(spect)] = 0 spect = spect.astype(np.int) spect = spect.reshape((width * height, ), order='C') spect[spect >= 64] = 63 spect_rgb = np.empty((height, width, 3), dtype=np.uint8) spect_rgb[:, :, 0] = cm_red[spect].reshape((height, width)) * 255 spect_rgb[:, :, 1] = cm_green[spect].reshape((height, width)) * 255 spect_rgb[:, :, 2] = cm_blue[spect].reshape((height, width)) * 255 # roi_start = int(start / duration_ms * width) # roi_end = int(np.ceil(end / duration_ms * width)) # seg_spect_rgb = file_spect_rgb[:, roi_start:roi_end, :] seg_spect_img = Image.fromarray(spect_rgb) seg_spect_img.save(seg_spect_path, format='PNG') celerylogger.info('spectrogram {} created'.format(seg_spect_path))
def extract_psd(extractor, audio_file): """ Extract audio file's spectrogram given its ID :param audio_file: :param extractor: :return: the normalised spectrogram (spectrogram - wise, not dimension wise) """ wav_file_path = wav_path(audio_file) spect = extractor(wav_file_path, audio_file.fs, 0, None) spect_min = np.min(spect) spect_max = np.max(spect) return (spect - spect_min) / (spect_max - spect_min)
def spect_from_seg(seg, extractor): af = seg.audio_file wav_file_path = wav_path(af) fs = af.fs start = seg.start_time_ms end = seg.end_time_ms database = af.database return extractor(wav_file_path, fs=fs, start=start, end=end, nfft=database.nfft, noverlap=database.noverlap)
def handle(self, dbs, *args, **options): # Correct false wav info for af in AudioFile.objects.all(): wav_file_path = wav_path(af, 'wav') fs, length = get_wav_info(wav_file_path) if fs != af.fs or length != af.length: print( 'Correct file {}, originally length={} fs={}, now length={}, fs={}' .format(af.name, af.length, af.fs, length, fs)) af.fs = fs af.length = length af.save() conns = None try: conns = get_dbconf(dbs) for pop in conns: conn = conns[pop] cur = conn.cursor() bitrate = 16 song_cur = conn.cursor( cursor_factory=psycopg2.extras.RealDictCursor) song_cur.execute( 'select w.framesize, w.stereo, w.samplerate, w.ssizeinbits, w.songid, s.name ' 'from wavs w join songdata s on w.songid=s.id where w.ssizeinbits={}' .format(bitrate)) songs = song_cur.fetchall() for song in songs: song_name = song['name'] # Import WAV data and save as WAV and MP3 files wav_file_path = '/tmp/{}'.format(song_name) mp3_file_path = '/tmp/{}.mp3'.format(song_name) fs, length = import_pcm(song, cur, song_name, wav_file_path, mp3_file_path) fs1, length1 = get_wav_info(wav_file_path) if fs != fs1 or length != length1: print('-------SHIT--------') print( 'Song {} length = {} fs = {} time = {}, length1 = {} fs1 = {} time1 = {}' .format(song_name, length, fs, length / fs, length1, fs1, length1 / fs1)) finally: for dbconf in conns: conn = conns[dbconf] if conn is not None: conn.close()
def import_pcm(song, cur, audio_file, wav_file_path=None, compressed_url=None): if wav_file_path is None: wav_file_path = wav_path(audio_file) if compressed_url is None: compressed_url = audio_path(audio_file, settings.AUDIO_COMPRESSED_FORMAT) if not os.path.isfile(wav_file_path): # print('Importing {}'.format(song_name)) song_id = song['songid'] cur.execute('select wav from wavs where songid={};'.format(song_id)) data = cur.fetchone() raw_pcm = str_to_bytes(data[0]) nchannels = song['stereo'] bitrate = int(song['ssizeinbits']) fs = int(song['samplerate']) byte_per_frame = int(bitrate / 8) nframes_all_channel = int(len(raw_pcm) / byte_per_frame) nframes_per_channel = int(nframes_all_channel / nchannels) length = nframes_per_channel ensure_parent_folder_exists(wav_file_path) if bitrate == 24: array1 = np.frombuffer(raw_pcm, dtype=np.ubyte) array2 = array1.reshape((nframes_per_channel, nchannels, byte_per_frame)).astype(np.uint8) wf.write_24b(wav_file_path, fs, array2) else: data = array.array('i', raw_pcm) sound = pydub.AudioSegment(data=data, sample_width=byte_per_frame, frame_rate=fs, channels=nchannels) sound.export(wav_file_path, 'wav') else: fs, length = get_wav_info(wav_file_path) if not os.path.isfile(compressed_url): ensure_parent_folder_exists(compressed_url) sound = pydub.AudioSegment.from_wav(wav_file_path) sound.export(compressed_url, format=settings.AUDIO_COMPRESSED_FORMAT) return fs, length
def handle(self, *args, **options): segment_csv = options['segment_csv'] folder = options['folder'] mkdirp(folder) with open(segment_csv, 'r', encoding='utf-8') as f: reader = csv.DictReader(f, delimiter='\t') supplied_fields = reader.fieldnames # The first field is always id, the second field is always the primary label type primary_label_level = supplied_fields[1] sid_to_label = { int(row['id']): row[primary_label_level] for row in reader } sids = sid_to_label.keys() audio_file_dict = {} for segment in Segment.objects.filter(id__in=sids): af = segment.audio_file if af in audio_file_dict: info = audio_file_dict[af] else: info = [] audio_file_dict[af] = info info.append( (segment.id, segment.start_time_ms, segment.end_time_ms)) bar = Bar('Exporting segments ...', max=len(sid_to_label)) for af, info in audio_file_dict.items(): wav_file_path = wav_path(af) fullwav = pydub.AudioSegment.from_wav(wav_file_path) for id, start, end in info: audio_segment = fullwav[start:end] filename = '{}.wav'.format(id) filepath = os.path.join(folder, filename) with open(filepath, 'wb') as f: audio_segment.export(f, format='wav') bar.next() bar.finish()
def handle(self, testfile, tofile, fmt, *args, **options): if fmt is None: if tofile is None: raise Exception("Either format or --to-file must be provided") else: fmt = tofile.split('.')[-1] print("Format = {}".format(fmt)) if testfile is None: audio_files = AudioFile.objects.filter(original=None) conversion_list = [] for af in audio_files: wav_file_path = wav_path(af) conversion_scheme = dict(wav=wav_file_path) target_file_path = audio_path(af, fmt) conversion_scheme['other'] = (fmt, target_file_path) conversion_list.append(conversion_scheme) bar = Bar('Converting song ...', max=len(conversion_list)) for conversion_scheme in conversion_list: convert(conversion_scheme, print_stats=False) bar.next() bar.finish() else: if tofile is None: target_file_path = '/tmp/test-compress-wav.' + fmt conversion_scheme = dict(wav=testfile, other=(fmt, target_file_path)) convert(conversion_scheme) os.remove(target_file_path) else: conversion_scheme = dict(wav=testfile, other=(fmt, tofile)) convert(conversion_scheme)
def extract_psd(extractor, audio_file, normalise=True): """ Extract audio file's spectrogram given its ID :param audio_file: :param extractor: :return: the normalised spectrogram (spectrogram - wise, not dimension wise) """ wav_file_path = wav_path(audio_file) database = audio_file.database spect = extractor(wav_file_path, audio_file.fs, start=0, end=None, nfft=database.nfft, noverlap=database.noverlap) spect_min = np.min(spect) if normalise: spect_max = np.max(spect) return (spect - spect_min) / (spect_max - spect_min) else: return spect
def extract_segment_features_for_segments(runner, sids, features, force=False): segments = Segment.objects.filter(id__in=sids) tids = np.array(segments.values_list('tid', flat=True), dtype=np.int32) if len(tids) == 0: return tid_min = tids.min() tid_max = tids.max() storage_loc_template = get_storage_loc_template() f2af2segments = {} n_calculations = 0 for feature in features: storage_loc = storage_loc_template.format(feature.name) mkdirp(storage_loc) if force: tids_target = tids else: existing_tids = bs.retrieve_ids(storage_loc, (tid_min, tid_max)) sorted_ids, sort_order = np.unique(existing_tids, return_index=True) non_existing_idx = np.where( np.logical_not(np.isin(tids, sorted_ids))) missing_tids = tids[non_existing_idx] tids_target = missing_tids af_to_segments = {} vl = segments.filter(tid__in=tids_target).order_by('audio_file', 'start_time_ms')\ .values_list('tid', 'audio_file', 'start_time_ms', 'end_time_ms', 'audio_file__database__nfft', 'audio_file__database__noverlap', 'audio_file__database__lpf', 'audio_file__database__hpf') if len(vl): for tid, afid, start_time_ms, end_time_ms, nfft, noverlap, lpf, hpf in vl: if afid not in af_to_segments: af_to_segments[afid] = [] af_to_segments[afid].append((tid, start_time_ms, end_time_ms, nfft, noverlap, lpf, hpf)) f2af2segments[feature] = af_to_segments n_calculations += len(tids_target) if n_calculations: runner.start(limit=n_calculations) for ind, (feature, af_to_segments) in enumerate(f2af2segments.items()): _tids = [] _fvals = [] storage_loc = storage_loc_template.format(feature.name) afids = list(af_to_segments.keys()) af_lookup = { x.id: x for x in AudioFile.objects.filter(id__in=afids) } for afid, segs_info in af_to_segments.items(): af = af_lookup[afid] wav_file_path = wav_path(af) try: __tids, __fvals = extract_segment_feature_for_audio_file( wav_file_path, segs_info, feature) except Exception as e: raise Exception( 'Error extracting [{}] for file {}. Error message: {}'. format(feature.name, af.name, str(e))) # _tids += __tids _fvals += __fvals if len(_tids) >= 100: bs.store(_tids, _fvals, storage_loc) runner.tick(len(_tids)) _tids = [] _fvals = [] if len(_tids): bs.store(_tids, _fvals, storage_loc) runner.tick(len(_tids))
def extract_xfcc(segments, config, is_pattern=False, method_name='mfcc'): nsegs = len(segments) lower = int(config.get('lower', 20)) upper = int(config.get('upper', 8000)) ndelta = int(config.get('delta', 0)) nfilt = int(config.get('nfilt', 26)) nmfcc = int(config.get('nmfcc', nfilt / 2)) assert nmfcc <= nfilt xtrargs = { 'name': method_name, 'lowfreq': lower, 'highfreq': upper, 'numcep': nmfcc, 'nfilt': nfilt } if 'cepsfunc' in config: xtrargs['cepsfunc'] = config['cepsfunc'] if method_name in ['mfcc', 'bfcc', 'lfcc']: method = xfcc elif method_name == 'gfcc': lowhear = int(config.get('lowhear', 500)) hihear = int(config.get('hihear', 12000)) xtrargs['lowhear'] = lowhear xtrargs['hihear'] = hihear method = xfcc elif method_name in ['mfc', 'bfc', 'lfc']: method = xfc elif method_name == 'gfc': lowhear = int(config.get('lowhear', 500)) hihear = int(config.get('hihear', 12000)) xtrargs['lowhear'] = lowhear xtrargs['hihear'] = hihear method = xfc else: raise Exception('No such method: {}'.format(method_name)) lower = xtrargs['lowfreq'] upper = xtrargs['highfreq'] nmfcc = xtrargs['numcep'] bar = Bar('Extracting {} Range={}~{}, nCoefs={}, delta={}'.format( method_name, lower, upper, nmfcc, ndelta), max=nsegs, suffix='%(index)d/%(max)d %(elapsed)ds/%(eta)ds') if is_pattern: cache = {} original_segment_ids = np.array(segments.values_list('id', flat=True), dtype=np.int32) # Sort by duration so that we can cache them effectively segments = segments.annotate(duration=F('end_time_ms') - F('start_time_ms')).order_by('duration') duration_sorted_segment_ids = np.array(segments.values_list('id', flat=True), dtype=np.int32) # We need the index array in order to restore the original order: ascending_sorted_idx = np.sort(original_segment_ids) ascending_sorted_to_original_order = np.searchsorted( ascending_sorted_idx, original_segment_ids) duration_sorted_to_ascending_sorted_order = np.argsort( duration_sorted_segment_ids) duration_sorted_to_original_order = duration_sorted_to_ascending_sorted_order[ ascending_sorted_to_original_order] sorted_mfcc = [] segments_info = segments.values_list('duration', 'audio_file__fs') for duration, fs in segments_info: if duration not in cache: cache = {duration: {}} if fs not in cache[duration]: chirps = [] for amp_profile_name in amp_profile_names: for f0_profile_name in f0_profile_names: chirp = generate_chirp(f0_profile_name, amp_profile_name, duration, fs) chirps.append(chirp) cache[duration][fs] = chirps if 'ft' not in cache[duration]: chirps = cache[duration][fs] mfcc_fts = [] for chirp in chirps: mfcc_ft = _extract_xfcc(chirp, fs, method, xtrargs, ndelta) mfcc_fts.append(mfcc_ft) cache[duration]['ft'] = mfcc_fts else: mfcc_fts = cache[duration]['ft'] sorted_mfcc.append(mfcc_fts) bar.next() mfccs = np.array(sorted_mfcc)[duration_sorted_to_original_order] else: mfccs = [] segment_data = {} for segment in segments: fs = segment.audio_file.fs file_url = wav_path(segment.audio_file) sig = wavfile.read_segment(file_url, segment.start_time_ms, segment.end_time_ms, mono=True) mfcc_fts = _extract_xfcc(sig, fs, method, xtrargs, ndelta) segment_data['s' + str(id)] = dict(sig=sig, fs=fs, ft=mfcc_fts) mfccs.append(mfcc_fts) bar.next() mfccs = np.array(mfccs) import scipy.io as sio sio.savemat('/tmp/segment_data.mat', segment_data) bar.finish() return mfccs
def extract_spectrogram(): """ Extract raw sepectrograms for all segments (Not the masked spectrogram from Luscinia) :return: """ audio_to_segs = {} for segment in Segment.objects.all(): audio_file = segment.audio_file if audio_file not in audio_to_segs: audio_to_segs[audio_file] = [(segment.id, segment.start_time_ms, segment.end_time_ms)] else: audio_to_segs[audio_file].append( (segment.id, segment.start_time_ms, segment.end_time_ms)) n = len(audio_to_segs) bar = Bar('Exporting spects ...', max=n) for audio_file, seg_list in audio_to_segs.items(): count = 0 for seg_id, start, end in seg_list: seg_spect_path = spect_fft_path(seg_id, 'syllable') if os.path.isfile(seg_spect_path): count += 1 if count == len(seg_list): bar.next() continue filepath = wav_path(audio_file) fs, sig = wav_2_mono(filepath) duration_ms = len(sig) * 1000 / fs _, _, s = signal.stft(sig, fs=fs, window=window, noverlap=noverlap, nfft=window_size, return_onesided=True) file_spect = np.abs(s * scale) height, width = np.shape(file_spect) file_spect = np.flipud(file_spect) try: file_spect = np.log10(file_spect) file_spect = ((file_spect - global_min_spect_pixel) / interval64) file_spect[np.isinf(file_spect)] = 0 file_spect = file_spect.astype(np.int) file_spect = file_spect.reshape((width * height, ), order='C') file_spect[file_spect >= 64] = 63 file_spect_rgb = np.empty((height, width, 3), dtype=np.uint8) file_spect_rgb[:, :, 0] = cm_red[file_spect].reshape( (height, width)) * 255 file_spect_rgb[:, :, 1] = cm_green[file_spect].reshape( (height, width)) * 255 file_spect_rgb[:, :, 2] = cm_blue[file_spect].reshape( (height, width)) * 255 file_spect_img = Image.fromarray(file_spect_rgb) file_spect_path = spect_fft_path(audio_file.id, 'song') ensure_parent_folder_exists(file_spect_path) if not os.path.isfile(file_spect_path): file_spect_img.save(file_spect_path, format='PNG') for seg_id, start, end in seg_list: roi_start = int(start / duration_ms * width) roi_end = int(np.ceil(end / duration_ms * width)) seg_spect_rgb = file_spect_rgb[:, roi_start:roi_end, :] seg_spect_img = Image.fromarray(seg_spect_rgb) seg_spect_path = spect_fft_path(seg_id, 'syllable') ensure_parent_folder_exists(seg_spect_path) if not os.path.isfile(seg_spect_path): seg_spect_img.save(seg_spect_path, format='PNG') except Exception as e: warning('Error occured at song id: {}'.format(audio_file.id)) raise e bar.next() bar.finish()
def handle(self, *args, **options): database_name = options['database_name'] annotator_name = options['annotator_name'] label_level = options['label_level'] save_to = options['save_to'] format = options['format'] min_occur = options['min_occur'] num_instances = options['num_instances'] normalised = options['normalised'] if num_instances is not None: assert num_instances >= min_occur, 'num_instances must be >= min_occur' database = get_or_error(Database, dict(name__iexact=database_name)) annotator = get_or_error(User, dict(username__iexact=annotator_name)) segments = Segment.objects.filter(audio_file__database=database) sids = np.array(list(segments.order_by('id').values_list('id', flat=True))) labels, no_label_ids = get_labels_by_sids(sids, label_level, annotator, min_occur) if len(no_label_ids) > 0: sids, _, labels = exclude_no_labels(sids, None, labels, no_label_ids) if num_instances: sids, _, labels = select_instances(sids, None, labels, num_instances) unique_labels, enum_labels = np.unique(labels, return_inverse=True) fold_indices = get_kfold_indices(enum_labels, min_occur) segments_info = {sid: (label, label_enum, fold_ind) for sid, label, label_enum, fold_ind in zip(sids, labels, enum_labels, fold_indices)} segs = Segment.objects.filter(id__in=sids) audio_file_dict = {} for seg in segs: af = seg.audio_file if af in audio_file_dict: info = audio_file_dict[af] else: info = [] audio_file_dict[af] = info info.append((seg.id, seg.start_time_ms, seg.end_time_ms)) audio_info = [] bar = Bar('Exporting segments ...', max=len(segs)) metadata_file_path = os.path.join(save_to, 'metadata.tsv') extractor = extractors.get(format, None) for af, info in audio_file_dict.items(): wav_file_path = wav_path(af) fullwav = pydub.AudioSegment.from_wav(wav_file_path) for id, start, end in info: label, label_enum, fold_ind = segments_info[id] audio_segment = fullwav[start: end] filename = '{}.{}'.format(id, format) filepath = os.path.join(save_to, filename) ensure_parent_folder_exists(filepath) if not os.path.isfile(filepath): if extractor is not None: extractor(wav_file_path, af.fs, start, end, filepath) else: with open(filepath, 'wb') as f: audio_segment.export(f, format=format) audio_info.append( (id, filename, label, label_enum, fold_ind) ) bar.next() with open(metadata_file_path, 'w') as f: f.write('id\tfilename\tlabel\tlabel_enum\tfold\n') for id, filename, label, label_enum, fold_ind in audio_info: f.write('{}\t{}\t{}\t{}\t{}\n'.format(id, filename, label, label_enum, fold_ind)) bar.finish() if normalised: norm_folder = os.path.join(save_to, 'normalised') mkdirp(norm_folder) global_min, global_max = extract_global_min_max(save_to, format) save_global_min_max(norm_folder, global_min, global_max) normalise_all(save_to, norm_folder, format, global_min, global_max)