def delete_segments_async(): segments = Segment.fobjs.filter(active=False) this_vl = segments.values_list('id', 'tid') this_tids = [x[1] for x in this_vl] this_sids = [x[0] for x in this_vl] other_vl = Segment.objects.filter(tid__in=this_tids).values_list( 'id', 'tid') tid2ids = {x: [] for x in this_tids} path_template = spect_fft_path('{}', 'syllable') for id, tid in other_vl: tid2ids[tid].append(id) # These segmnents might share the same spectrogram with other segments. Only delete the spectrogramn # if there is only one segment (ID) associated with the syllable's TID for tid, ids in tid2ids.items(): if len(ids) == 1: spect_path = path_template.format(tid) if os.path.isfile(spect_path): os.remove(spect_path) ExtraAttrValue.objects.filter(attr__klass=Segment.__name__, owner_id__in=this_sids).delete() segments.delete()
def compress_data(database): import tarfile tar = tarfile.open("user_data.tar.gz", "w:gz") segments_ids = Segment.objects.filter( audio_file__database=database).values_list('id', flat=True) audio_files = AudioFile.objects.filter(database=database).values_list( 'name', flat=True) bar = Bar('Zipping ...', max=len(segments_ids) + len(audio_files)) for s in segments_ids: seg_spect_path = spect_fft_path(s, 'syllable') seg_mask_path = spect_mask_path('{}'.format(s)) tar.add(seg_mask_path) tar.add(seg_spect_path) bar.next() for a in audio_files: compressed_path = audio_path(a, settings.AUDIO_COMPRESSED_FORMAT) if os.path.isfile(compressed_path): tar.add(compressed_path) bar.next() tar.close() bar.finish()
def change_spectrogram_to_use_tid(apps, schema_editor): """ Up to now the spects are stored using syllable IDs. When a syllable is copied, a new and identical spect is made. This is wasteful & slowing down the process of copying significantly. This commit will change the storage to use TID, which is the same for original and the copy, and only changes when the copy changes. This migration remove all identical copies of the original syllable """ db_alias = schema_editor.connection.alias model = apps.get_model('koe', 'Segment') vl = model.objects.using(db_alias).all().values_list('id', 'tid') tids = model.objects.using(db_alias).all().values_list('tid', flat=True) tid2ids = {x: [] for x in tids} to_delete = [] path_template = spect_fft_path('{}', 'syllable') bak_path_template = spect_fft_path('{}-bak', 'syllable') for id, tid in vl: tid2ids[tid].append(id) for tid, ids in tid2ids.items(): # The original syllable is always the one with smallest ID min_id = min(ids) for id in ids: sid_path = path_template.format(id) if id == min_id: tid_path_bak = bak_path_template.format(tid) if os.path.isfile(sid_path): copyfile(sid_path, tid_path_bak) else: warning('File {} is missing'.format(sid_path)) else: if os.path.isfile(sid_path): to_delete.append(sid_path) for sid_path in to_delete: os.remove(sid_path) for tid, ids in tid2ids.items(): min_id = min(ids) for id in ids: if id == min_id: tid_path = path_template.format(tid) tid_path_bak = bak_path_template.format(tid) if os.path.isfile(tid_path_bak): os.rename(tid_path_bak, tid_path)
def extract_spectrogram(audio_file_id): """ Extract raw sepectrograms for all segments (Not the masked spectrogram from Luscinia) of an audio file :param audio_file: :return: """ audio_file = AudioFile.objects.get(id=audio_file_id) segs_info = Segment.objects.filter(audio_file=audio_file).values_list( 'tid', 'start_time_ms', 'end_time_ms') missing_segs_info = [] for tid, start, end in segs_info: seg_spect_path = spect_fft_path(tid, 'syllable') ensure_parent_folder_exists(seg_spect_path) if not os.path.isfile(seg_spect_path): missing_segs_info.append((seg_spect_path, start, end)) if len(missing_segs_info) > 0: filepath = wav_path(audio_file) fs, sig = wav_2_mono(filepath) duration_ms = len(sig) * 1000 / fs _, _, s = signal.stft(sig, fs=fs, window=window, noverlap=noverlap, nfft=window_size, return_onesided=True) file_spect = np.abs(s * scale) height, width = np.shape(file_spect) file_spect = np.flipud(file_spect) file_spect = np.log10(file_spect) file_spect = ((file_spect - global_min_spect_pixel) / interval64) file_spect[np.isinf(file_spect)] = 0 file_spect = file_spect.astype(np.int) file_spect = file_spect.reshape((width * height, ), order='C') file_spect[file_spect >= 64] = 63 file_spect_rgb = np.empty((height, width, 3), dtype=np.uint8) file_spect_rgb[:, :, 0] = cm_red[file_spect].reshape( (height, width)) * 255 file_spect_rgb[:, :, 1] = cm_green[file_spect].reshape( (height, width)) * 255 file_spect_rgb[:, :, 2] = cm_blue[file_spect].reshape( (height, width)) * 255 for path, start, end in missing_segs_info: roi_start = int(start / duration_ms * width) roi_end = int(np.ceil(end / duration_ms * width)) seg_spect_rgb = file_spect_rgb[:, roi_start:roi_end, :] seg_spect_img = Image.fromarray(seg_spect_rgb) seg_spect_img.save(path, format='PNG')
def extract_spectrogram(): """ Extract raw sepectrograms for all segments (Not the masked spectrogram from Luscinia) :return: """ audio_to_segs = {} for segment in Segment.objects.all(): audio_file = segment.audio_file if audio_file not in audio_to_segs: audio_to_segs[audio_file] = [(segment.id, segment.start_time_ms, segment.end_time_ms)] else: audio_to_segs[audio_file].append( (segment.id, segment.start_time_ms, segment.end_time_ms)) n = len(audio_to_segs) bar = Bar('Exporting spects ...', max=n) for audio_file, seg_list in audio_to_segs.items(): count = 0 for seg_id, start, end in seg_list: seg_spect_path = spect_fft_path(seg_id, 'syllable') if os.path.isfile(seg_spect_path): count += 1 if count == len(seg_list): bar.next() continue filepath = wav_path(audio_file) fs, sig = wav_2_mono(filepath) duration_ms = len(sig) * 1000 / fs _, _, s = signal.stft(sig, fs=fs, window=window, noverlap=noverlap, nfft=window_size, return_onesided=True) file_spect = np.abs(s * scale) height, width = np.shape(file_spect) file_spect = np.flipud(file_spect) try: file_spect = np.log10(file_spect) file_spect = ((file_spect - global_min_spect_pixel) / interval64) file_spect[np.isinf(file_spect)] = 0 file_spect = file_spect.astype(np.int) file_spect = file_spect.reshape((width * height, ), order='C') file_spect[file_spect >= 64] = 63 file_spect_rgb = np.empty((height, width, 3), dtype=np.uint8) file_spect_rgb[:, :, 0] = cm_red[file_spect].reshape( (height, width)) * 255 file_spect_rgb[:, :, 1] = cm_green[file_spect].reshape( (height, width)) * 255 file_spect_rgb[:, :, 2] = cm_blue[file_spect].reshape( (height, width)) * 255 file_spect_img = Image.fromarray(file_spect_rgb) file_spect_path = spect_fft_path(audio_file.id, 'song') ensure_parent_folder_exists(file_spect_path) if not os.path.isfile(file_spect_path): file_spect_img.save(file_spect_path, format='PNG') for seg_id, start, end in seg_list: roi_start = int(start / duration_ms * width) roi_end = int(np.ceil(end / duration_ms * width)) seg_spect_rgb = file_spect_rgb[:, roi_start:roi_end, :] seg_spect_img = Image.fromarray(seg_spect_rgb) seg_spect_path = spect_fft_path(seg_id, 'syllable') ensure_parent_folder_exists(seg_spect_path) if not os.path.isfile(seg_spect_path): seg_spect_img.save(seg_spect_path, format='PNG') except Exception as e: warning('Error occured at song id: {}'.format(audio_file.id)) raise e bar.next() bar.finish()