예제 #1
0
    def handle(self, *args, **options):
        # audio_file_path = '/Users/yfukuzaw/workspace/koe/user_data/audio/wav/52/201911142.wav'
        # begin_ms = 234
        # end_ms = 544
        # fs, length = get_wav_info(audio_file_path)
        # read_segment(audio_file_path, beg_ms=begin_ms, end_ms=end_ms, mono=True, normalised=True)

        # audio_files = AudioFile.objects.filter(fs__gt=320000)
        #
        # for audio_file in audio_files:
        #     segments = Segment.objects.filter(audio_file=audio_file)
        #     ratio = 48000 / audio_file.fs
        #     duration_ms = int(audio_file.length * 1000 /audio_file.fs)
        #     for segment in segments:
        #         beg_ms = segment.start_time_ms
        #         end_ms = segment.end_time_ms
        #
        #         new_beg = max(0, int(np.round(beg_ms * ratio)))
        #         new_end = min(int(np.round(end_ms * ratio)), duration_ms)
        #
        #         segment.start_time_ms = new_beg
        #         segment.end_time_ms = new_end
        #
        #         sid = segment.id
        #
        #         print('Change syllable #{} from [{} - {}] to [{} - {}]'.format(sid, beg_ms, end_ms, new_beg, new_end))
        #
        #         segment.save()

        audio_files = AudioFile.objects.all()
        num_segments = Segment.objects.all().count()
        num_tested = 0

        bar = Bar('Testing...', max=num_segments)
        for audio_file in audio_files:
            segments = Segment.objects.filter(audio_file=audio_file)
            num_segments = segments.count()
            if num_tested + num_segments < already_tested:
                num_tested += num_segments
                bar.next(num_segments)
                continue

            audio_file_path = wav_path(audio_file)
            for segment in segments:
                begin_ms = segment.start_time_ms
                end_ms = segment.end_time_ms
                if os.path.isfile(audio_file_path):
                    read_segment(audio_file_path,
                                 beg_ms=begin_ms,
                                 end_ms=end_ms,
                                 mono=True,
                                 normalised=True)
                bar.next()
                num_tested += 1

        bar.finish()
예제 #2
0
    def setUp(self):
        filepath = 'tests/example 1.wav'
        self.fs, _ = wav_2_mono(filepath, normalised=False)
        long_segment = wavfile.read_segment(filepath,
                                            beg_ms=100,
                                            end_ms=300,
                                            mono=True)
        self.long_segment = np.ascontiguousarray(long_segment)

        short_segment = wavfile.read_segment(filepath,
                                             beg_ms=100,
                                             end_ms=149,
                                             mono=True)
        self.short_segment = np.ascontiguousarray(short_segment)
예제 #3
0
파일: audio.py 프로젝트: jren2019/koe
def _cached_get_segment_audio_data(audio_file_name, database_id, fs, start,
                                   end):
    wav_file_path = data_path('audio/wav/{}'.format(database_id),
                              '{}.wav'.format(audio_file_name))
    chunk = wavfile.read_segment(wav_file_path,
                                 start,
                                 end,
                                 normalised=False,
                                 mono=True)

    audio_segment = pydub.AudioSegment(chunk.tobytes(),
                                       frame_rate=fs,
                                       sample_width=chunk.dtype.itemsize,
                                       channels=1)

    audio_segment = _match_target_amplitude(audio_segment)

    out = io.BytesIO()
    audio_segment.export(out, format=settings.AUDIO_COMPRESSED_FORMAT)
    binary_content = out.getvalue()
    out.close()

    response = HttpResponse()
    response.write(binary_content)
    response['Content-Type'] = 'audio/' + settings.AUDIO_COMPRESSED_FORMAT
    response['Content-Length'] = len(binary_content)
    return response
예제 #4
0
파일: audio.py 프로젝트: jren2019/koe
def change_fs_without_resampling(wav_file, new_fs, new_name):
    """
    Create a new wav file with the a new (fake) sample rate, without changing the actual data.
    This is necessary if the frequency of the wav file is higher than the maximum sample rate that the browser supports
    :param wav_file: path to the original wav file
    :param new_fs: the new sample rate
    :return: the path of the faked wav file
    """
    size, comp, num_channels, rate, sbytes, block_align, bitrate, bytes, dtype = read_wav_info(
        wav_file)
    ubyte_data = read_segment(wav_file,
                              0,
                              None,
                              normalised=False,
                              retype=False)
    byte_length = ubyte_data.size
    nframes_per_channel = byte_length // block_align
    byte_per_frame = bitrate // 8

    uint8_data = ubyte_data.reshape(
        (nframes_per_channel, num_channels, byte_per_frame)).astype(np.uint8)

    if bitrate == 24:
        write_24b(new_name, new_fs, uint8_data)
    else:
        write(new_name, new_fs, uint8_data, bitrate=bitrate)
예제 #5
0
def get_sig(args):
    wav_file_path, fs, start, end, win_length, lpf, hpf = \
        unroll_args(args, ['wav_file_path', 'fs', 'start', 'end', 'win_length', 'lpf', 'hpf'])

    if wav_file_path:
        sig = wavfile.read_segment(wav_file_path, start, end, mono=True, normalised=True, winlen=win_length)
    else:
        sig = args['sig']

    return butter_bandpass_filter(sig, lpf, hpf, fs)
예제 #6
0
파일: utils.py 프로젝트: jren2019/koe
def cached_stft(wav_file_path, start, end, nfft, noverlap, win_length,
                window_name, center):
    chunk, fs = wavfile.read_segment(wav_file_path,
                                     start,
                                     end,
                                     normalised=True,
                                     mono=True,
                                     return_fs=True)
    return stft_from_sig(chunk, nfft, noverlap, win_length, window_name,
                         center)
예제 #7
0
파일: model_utils.py 프로젝트: jren2019/koe
def extract_spectrogram(audio_file, segs_info):
    """
    Extract raw sepectrograms for all segments (Not the masked spectrogram from Luscinia) of an audio file
    :param audio_file:
    :return:
    """
    filepath = wav_path(audio_file)

    fs, duration = get_wav_info(filepath)
    if not os.path.isfile(filepath):
        raise CustomAssertionError("File {} not found".format(audio_file.name))

    for tid, start, end in segs_info:
        seg_spect_path = get_abs_spect_path(tid)
        ensure_parent_folder_exists(seg_spect_path)

        sig = read_segment(filepath,
                           beg_ms=start,
                           end_ms=end,
                           mono=True,
                           normalised=True,
                           return_fs=False,
                           retype=True,
                           winlen=window_size)
        _, _, s = signal.stft(sig,
                              fs=fs,
                              window=window,
                              noverlap=noverlap,
                              nfft=window_size,
                              return_onesided=True)
        spect = np.abs(s * scale)

        height, width = np.shape(spect)
        spect = np.flipud(spect)

        spect = np.log10(spect)
        spect = ((spect - global_min_spect_pixel) / interval64)
        spect[np.isinf(spect)] = 0
        spect = spect.astype(np.int)

        spect = spect.reshape((width * height, ), order='C')
        spect[spect >= 64] = 63
        spect_rgb = np.empty((height, width, 3), dtype=np.uint8)
        spect_rgb[:, :, 0] = cm_red[spect].reshape((height, width)) * 255
        spect_rgb[:, :, 1] = cm_green[spect].reshape((height, width)) * 255
        spect_rgb[:, :, 2] = cm_blue[spect].reshape((height, width)) * 255

        # roi_start = int(start / duration_ms * width)
        # roi_end = int(np.ceil(end / duration_ms * width))

        # seg_spect_rgb = file_spect_rgb[:, roi_start:roi_end, :]
        seg_spect_img = Image.fromarray(spect_rgb)

        seg_spect_img.save(seg_spect_path, format='PNG')
        celerylogger.info('spectrogram {} created'.format(seg_spect_path))
def get_sig(args):
    wav_file_path, fs, start, end = unroll_args(
        args, ['wav_file_path', 'fs', 'start', 'end'])
    if wav_file_path:
        sig = wavfile.read_segment(wav_file_path,
                                   start,
                                   end,
                                   mono=True,
                                   normalised=True)
    else:
        sig = args['sig']
    return sig
예제 #9
0
    def test_read_segment(self):
        filepath = 'tests/example 1.wav'

        fs, sig = wav_2_mono(filepath, normalised=True)
        full_siglen = len(sig)
        max_end_ms = int(np.floor(full_siglen / fs * 1000))

        # Test read_segment for the full duration (end_ms = None, beg_ms = 0)
        segment0 = wavfile.read_segment(filepath, mono=True)
        self.assertEqual(full_siglen, len(segment0))

        # Test reading segments of different length from different starting points.
        # The returned segment must have the prescribed length
        for beg_ms in [0, 1, 2, 20, 30, 100]:
            for length_ms in [1, 100, 150, 153, 200, max_end_ms - beg_ms]:
                end_ms = beg_ms + length_ms
                segment1 = wavfile.read_segment(filepath,
                                                beg_ms=beg_ms,
                                                end_ms=end_ms,
                                                mono=True)
                segment1_len_ms = np.round(len(segment1) * 1000 / fs)
                self.assertEqual(segment1_len_ms, length_ms)

        framelength = 256

        for beg_ms in [0, 1, 2, 20, 30, 100]:
            for length_ms in [1, 100, 150, 153, 200, max_end_ms - beg_ms]:
                correct_length = int(length_ms * fs / 1000)
                if correct_length % framelength != 0:
                    correct_length = np.ceil(
                        correct_length / framelength) * framelength

                end_ms = beg_ms + length_ms
                segment1 = wavfile.read_segment(filepath,
                                                beg_ms=beg_ms,
                                                end_ms=end_ms,
                                                mono=True,
                                                winlen=framelength)

                self.assertEqual(correct_length, len(segment1))
예제 #10
0
파일: utils.py 프로젝트: jren2019/koe
def wav_2_mono(file, **kwargs):
    """
    Read a wav file and return fs and first channel's data stream.
    The data is normalised to be equivalent to Matlab's `audioread(...)` function
    :param file:
    :return: fs and signal
    """
    data = wavfile.read_segment(file, **kwargs)
    if len(np.shape(data)) > 1:
        data = data[:, 0]

    fs, _ = get_wav_info(file)
    return fs, data
예제 #11
0
    def test_read_segment(self):
        filepath = 'tests/example 1.wav'

        fs, sig = wav_2_mono(filepath, normalised=True)
        full_siglen = len(sig)

        # Test read_segment for the full duration (end_ms = None, beg_ms = 0)
        segment0 = wavfile.read_segment(filepath, mono=True)
        self.assertEqual(full_siglen, len(segment0))

        # Test reading segments of different length from different starting points.
        # The returned segment must have the prescribed length
        for beg_ms in [0, 1, 2, 20, 30, 100]:
            for length_ms in [100, 150, 153, 200]:
                end_ms = beg_ms + length_ms
                segment1 = wavfile.read_segment(filepath,
                                                beg_ms=beg_ms,
                                                end_ms=end_ms,
                                                mono=True)
                segment1_len_ms = np.round(len(segment1) * 1000 / fs)

                self.assertEqual(segment1_len_ms, length_ms)
예제 #12
0
파일: utils.py 프로젝트: jren2019/koe
def get_sig(args):
    wav_file_path, fs, start, end, win_length = unroll_args(
        args, ['wav_file_path', 'fs', 'start', 'end', 'win_length'])

    if end and end - start < win_length:
        end = start + win_length
    if wav_file_path:
        sig = wavfile.read_segment(wav_file_path,
                                   start,
                                   end,
                                   mono=True,
                                   normalised=True)
    else:
        sig = args['sig']
    return sig
예제 #13
0
def extract_mfcc(wav_file_path, fs, start, end, nfft, noverlap, filepath=None):
    sig = wavfile.read_segment(wav_file_path,
                               beg_ms=start,
                               end_ms=end,
                               mono=True)
    args = dict(nfft=nfft,
                noverlap=noverlap,
                win_length=nfft,
                fs=fs,
                wav_file_path=None,
                start=0,
                end=None,
                sig=sig,
                center=True)
    value = mfcc(args)

    if filepath:
        with open(filepath, 'wb') as f:
            pickle.dump(value, f)
    else:
        return value
예제 #14
0
def extract_xfcc(segments, config, is_pattern=False, method_name='mfcc'):
    nsegs = len(segments)

    lower = int(config.get('lower', 20))
    upper = int(config.get('upper', 8000))
    ndelta = int(config.get('delta', 0))
    nfilt = int(config.get('nfilt', 26))
    nmfcc = int(config.get('nmfcc', nfilt / 2))

    assert nmfcc <= nfilt
    xtrargs = {
        'name': method_name,
        'lowfreq': lower,
        'highfreq': upper,
        'numcep': nmfcc,
        'nfilt': nfilt
    }
    if 'cepsfunc' in config:
        xtrargs['cepsfunc'] = config['cepsfunc']

    if method_name in ['mfcc', 'bfcc', 'lfcc']:
        method = xfcc
    elif method_name == 'gfcc':
        lowhear = int(config.get('lowhear', 500))
        hihear = int(config.get('hihear', 12000))
        xtrargs['lowhear'] = lowhear
        xtrargs['hihear'] = hihear
        method = xfcc
    elif method_name in ['mfc', 'bfc', 'lfc']:
        method = xfc
    elif method_name == 'gfc':
        lowhear = int(config.get('lowhear', 500))
        hihear = int(config.get('hihear', 12000))
        xtrargs['lowhear'] = lowhear
        xtrargs['hihear'] = hihear
        method = xfc
    else:
        raise Exception('No such method: {}'.format(method_name))

    lower = xtrargs['lowfreq']
    upper = xtrargs['highfreq']
    nmfcc = xtrargs['numcep']
    bar = Bar('Extracting {} Range={}~{}, nCoefs={}, delta={}'.format(
        method_name, lower, upper, nmfcc, ndelta),
              max=nsegs,
              suffix='%(index)d/%(max)d %(elapsed)ds/%(eta)ds')

    if is_pattern:
        cache = {}

        original_segment_ids = np.array(segments.values_list('id', flat=True),
                                        dtype=np.int32)

        # Sort by duration so that we can cache them effectively
        segments = segments.annotate(duration=F('end_time_ms') -
                                     F('start_time_ms')).order_by('duration')
        duration_sorted_segment_ids = np.array(segments.values_list('id',
                                                                    flat=True),
                                               dtype=np.int32)

        # We need the index array in order to restore the original order:
        ascending_sorted_idx = np.sort(original_segment_ids)

        ascending_sorted_to_original_order = np.searchsorted(
            ascending_sorted_idx, original_segment_ids)
        duration_sorted_to_ascending_sorted_order = np.argsort(
            duration_sorted_segment_ids)
        duration_sorted_to_original_order = duration_sorted_to_ascending_sorted_order[
            ascending_sorted_to_original_order]

        sorted_mfcc = []

        segments_info = segments.values_list('duration', 'audio_file__fs')
        for duration, fs in segments_info:
            if duration not in cache:
                cache = {duration: {}}
            if fs not in cache[duration]:
                chirps = []
                for amp_profile_name in amp_profile_names:
                    for f0_profile_name in f0_profile_names:
                        chirp = generate_chirp(f0_profile_name,
                                               amp_profile_name, duration, fs)
                        chirps.append(chirp)
                cache[duration][fs] = chirps

            if 'ft' not in cache[duration]:
                chirps = cache[duration][fs]
                mfcc_fts = []

                for chirp in chirps:
                    mfcc_ft = _extract_xfcc(chirp, fs, method, xtrargs, ndelta)
                    mfcc_fts.append(mfcc_ft)

                cache[duration]['ft'] = mfcc_fts

            else:
                mfcc_fts = cache[duration]['ft']

            sorted_mfcc.append(mfcc_fts)
            bar.next()
        mfccs = np.array(sorted_mfcc)[duration_sorted_to_original_order]

    else:
        mfccs = []
        segment_data = {}

        for segment in segments:
            fs = segment.audio_file.fs
            file_url = wav_path(segment.audio_file)
            sig = wavfile.read_segment(file_url,
                                       segment.start_time_ms,
                                       segment.end_time_ms,
                                       mono=True)
            mfcc_fts = _extract_xfcc(sig, fs, method, xtrargs, ndelta)

            segment_data['s' + str(id)] = dict(sig=sig, fs=fs, ft=mfcc_fts)

            mfccs.append(mfcc_fts)
            bar.next()
        mfccs = np.array(mfccs)

        import scipy.io as sio
        sio.savemat('/tmp/segment_data.mat', segment_data)

    bar.finish()
    return mfccs