예제 #1
0
def test_source_binauralizer():
    from pyutils.iolib.audio import load_wav, save_wav
    from pyutils.iolib.position import read_position_file

    # binauralizer = SourceBinauralizer(use_hrtfs=True, cipic_dir='hrtfs/cipic_subj3')
    binauralizer = SourceBinauralizer(use_hrtfs=False)

    # Static source
    sample = 'wav_test/gen_synthetic-S1'
    positions, wav_fns, _, sample_ids = read_position_file(sample+'-position.txt')
    mono, rate = load_wav(wav_fns[sample_ids[0]])
    source = PositionalSource(mono[:, 0], positions[sample_ids[0]][0], rate)
    stereo = binauralizer.binauralize([source])

    save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')

    # Moving source
    sample = 'wav_test/gen_synthetic-M1'
    positions, wav_fns, _, sample_ids = read_position_file(sample+'-position.txt')
    mono, rate = load_wav(wav_fns[sample_ids[0]])
    source = MovingSource(mono[:, 0], positions[sample_ids[0]], rate)
    stereo = np.zeros((mono.shape[0], 2))
    while source.tic():
        binauralizer.binauralize_frame([source], stereo, source.cur_idx)

    save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')
예제 #2
0
def test_ambix_emd():
    from pyutils.iolib.audio import load_wav
    # Run from project home (spatialaudiogen/)

    # Load ambisonics
    rate = 24000
    ambi1, _ = load_wav('data/wav_test/hello-left2right-ambix.wav', rate=rate)
    ambi2, _ = load_wav('data/wav_test/hello-statright-ambix.wav', rate=rate)
    print('Same FOA: EMD =', ambix_emd(ambi1, ambi1, rate))
    print('Diff FOA: EMD =', ambix_emd(ambi1, ambi2, rate))
예제 #3
0
def run(input_fn, position_fn, ambi_order, output_fn):
    mono, rate = load_wav(input_fn)
    if mono.ndim == 2 and mono.shape[1] > 1:
        warnings.warn(
            'Input waveform is not a mono source. Using only first channel.')
        mono = mono[:, 0]

    fmt = AmbiFormat(ambi_order=ambi_order, sample_rate=rate)
    encoder = AmbiEncoder(fmt)
    positions = [
        np.array([float(num) for num in l.strip().split()])
        for l in open(position_fn, 'r')
    ]
    positions = [Position(p[0], p[1], p[2], 'polar') for p in positions]

    if len(positions) == 1:
        # Stationary source
        source = PositionalSource(mono, positions[0], rate)
        ambi = encoder.encode(source)

    else:
        source = MovingSource(mono, positions, rate)
        ambi = AmbisonicArray(np.zeros((mono.shape[0], fmt.num_channels)), fmt)
        while source.tic():
            encoder.encode_frame(source, ambi, source.cur_idx)

    binauralizer = DirectAmbisonicBinauralizer(fmt, method='projection')
    # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir)
    stereo = binauralizer.binauralize(ambi.data)
    save_wav(output_fn, stereo, rate)
예제 #4
0
def run(input_fn, output_fn, position_fn='', angular_res=''):
    data, rate = load_wav(input_fn)
    duration = data.shape[0] / float(rate)

    ambiVis = SphericalAmbisonicsVisualizer(data, rate, angular_res=angular_res)
    if position_fn:
        srcVis = SphericalSourceVisualizer(position_fn, duration, ambiVis.visualization_rate(), angular_res=angular_res)

    writer = VideoWriter(output_fn,
                         video_fps=ambiVis.visualization_rate(),
                         width=ambiVis.frame_dims[1],
                         height=ambiVis.frame_dims[0],
                         rgb=True)

    cmap = np.stack(plt.get_cmap('inferno').colors)
    while True:
        frame = ambiVis.get_next_frame()
        if frame is None:
            break
        frame /= frame.max()

        # Super-impose gt position
        if position_fn:
            frame += srcVis.get_next_frame()

        # Process frame and write to disk
        frame = ((frame / frame.max()) * 255).astype(np.uint8)
        frame = (cmap[frame] * 255).astype(np.uint8)    # Add colormap
        writer.write_frame(frame)
예제 #5
0
def test_virtual_mic():
    from pyutils.iolib.audio import load_wav, save_wav
    mic = VirtualStereoMic()
    mono, rate = load_wav('wav_test/piano.wav')
    mono = mono[:, 0]

    positions = [[float(num) for num in l.strip().split()] for l in open('wav_test/piano_stat_position.txt', 'r')]
    positions = [Position(p[0], p[1], p[2], 'polar') for p in positions]
    source = PositionalSource(mono, positions[0], rate)

    stereo = mic.binauralize([source])
    save_wav('/tmp/output.wav', stereo, rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')

    positions = [[float(num) for num in l.strip().split()] for l in open('wav_test/piano_mov_position.txt', 'r')]
    positions = [Position(p[0], p[1], p[2], 'polar') for p in positions]
    source = MovingSource(mono, positions, rate)

    stereo = np.zeros((mono.shape[0], 2))
    while source.tic():
        mic.binauralize_frame([source], stereo, source.cur_idx)
    save_wav('/tmp/output.wav', stereo, rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')
예제 #6
0
    def __init__(self, dirname):
        elevation = np.array([
            -45, -39, -34, -28, -23, -17, -11, -6, 0, 6, 11, 17, 23, 28, 34,
            39, 45, 51, 56, 62, 68, 73, 79, 84, 90, 96, 101, 107, 113, 118,
            124, 129, 135, 141, 146, 152, 158, 163, 169, 174, 180, 186, 191,
            197, 203, 208, 214, 219, 225, 231
        ])
        azimuth = np.array([
            -80, -65, -55, -45, -35, -30, -25, -20, -15, -10, -5, 0, 5, 10, 15,
            20, 25, 30, 35, 45, 55, 65, 80
        ])

        self.right_hrir = np.zeros((200, len(azimuth), len(elevation)))
        self.left_hrir = np.zeros((200, len(azimuth), len(elevation)))
        for i, phi in enumerate(azimuth):
            right_fn = ('neg' if phi < 0 else '') + str(
                abs(phi)) + 'azright.wav'
            left_fn = ('neg' if phi < 0 else '') + str(abs(phi)) + 'azleft.wav'
            self.right_hrir[:, i, :] = np.flip(load_wav(
                os.path.join(dirname, right_fn))[0],
                                               axis=0)
            self.left_hrir[:, i, :] = np.flip(load_wav(
                os.path.join(dirname, left_fn))[0],
                                              axis=0)

        radius = 3.
        self.hrir_db = []
        for i, az in enumerate(azimuth):
            for j, elev in enumerate(elevation):
                xp = radius * cos(elev * pi / 180.) * sin(az * pi / 180.)
                yp = radius * cos(elev * pi / 180.) * cos(az * pi / 180.)
                zp = radius * sin(elev * pi / 180.)
                x, y, z = yp, -xp, zp
                # x, y, z = xp, yp, zp
                p = Position(x, y, z, 'cartesian')
                self.hrir_db.append(
                    (p, self.left_hrir[:, i, j], self.right_hrir[:, i, j]))

        self.kdt = KDTree(np.array([
            hrir[0].coords('cartesian') /
            np.linalg.norm(hrir[0].coords('cartesian'))
            for hrir in self.hrir_db
        ]),
                          leaf_size=2,
                          metric='euclidean')
예제 #7
0
def run(input_fn, x, y, z, ambi_order, output_fn):
    mono, rate = load_wav(input_fn)
    if mono.ndim == 2 and mono.shape[1] > 1:
        warnings.warn('Input waveform is nor a mono source. Using only first channel.')
        mono = mono[:, 0]

    encoder = AmbiEncoder(AmbiFormat(ambi_order=ambi_order, sample_rate=rate))
    source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate)
    ambi = encoder.encode(source)
    save_wav(output_fn, ambi.data, rate)
예제 #8
0
def test_moving_source():
    from pyutils.iolib.audio import load_wav
    mono, rate = load_wav('wav_test/piano.wav')
    mono = mono[:, 0]

    position_fn = 'wav_test/piano_mov_position.txt'
    positions = [np.array([float(num) for num in l.strip().split()]) for l in open(position_fn, 'r')]
    positions = [Position(p[0], p[1], p[2], 'polar') for p in positions]

    source = MovingSource(mono, positions, rate)
    while source.tic():
        source.position.print_position('polar')
def run(input_fn, x, y, z, output_fn, use_hrtfs, hrtf_dir):

    mono, rate = load_wav(input_fn)
    if mono.ndim == 2 and mono.shape[1] > 1:
        warnings.warn(
            'Input waveform is nor a mono source. Using only first channel.')
        mono = mono[:, 0]

    binauralizer = SourceBinauralizer(use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir)
    source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate)
    stereo = binauralizer.binauralize(source)
    save_wav(output_fn, stereo, rate)
예제 #10
0
def run(input_fn, output_fn, overwrite=False):
    if overwrite and os.path.exists(output_fn):
        os.remove(output_fn)
    assert not os.path.exists(output_fn)

    data, rate = load_wav(input_fn)
    ambi_order = int(np.sqrt(data.shape[1]) - 1)

    fmt = AmbiFormat(ambi_order=ambi_order, sample_rate=rate)
    binauralizer = DirectAmbisonicBinauralizer(fmt, method='pseudoinv')
    # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir)
    stereo = binauralizer.binauralize(data)
    save_wav(output_fn, stereo, rate)
def run(input_fn, x, y, z, ambi_order, output_fn):
    mono, rate = load_wav(input_fn)
    if mono.ndim == 2 and mono.shape[1] > 1:
        warnings.warn('Input waveform is nor a mono source. Using only first channel.')
        mono = mono[:, 0]

    encoder = AmbiEncoder(AmbiFormat(ambi_order=ambi_order, sample_rate=rate))
    source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate)
    ambi = encoder.encode(source)

    binauralizer = DirectAmbisonicBinauralizer(ambi.format, method='projection')
    # binauralizer = AmbisonicBinauralizer(ambi.format, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir)
    stereo = binauralizer.binauralize(ambi.data)
    save_wav(output_fn, stereo, rate)
예제 #12
0
def test_ambisonics_binauralizer():
    from pyutils.iolib.audio import load_wav, save_wav
    from pyutils.ambisonics.common import AmbiFormat

    sample = 'wav_test/gen_synthetic-S1'
    ambi, rate = load_wav(sample+'-ambix.wav')

    fmt = AmbiFormat(1, rate)
    binauralizer = DirectAmbisonicBinauralizer(fmt, method='pseudoinv')
    # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=True, cipic_dir='hrtfs/cipic_subj3')

    stereo = binauralizer.binauralize(ambi)

    save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')

    sample = 'wav_test/gen_synthetic-M1'
    ambi, rate = load_wav(sample+'-ambix.wav')
    stereo = binauralizer.binauralize(ambi)

    save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')
def run(input_fn, position_fn, output_fn, use_hrtfs, hrtf_dir):
    mono, rate = load_wav(input_fn)
    if mono.ndim == 2 and mono.shape[1] > 1:
        warnings.warn('Input waveform is not a mono source. Using only first channel.')
        mono = mono[:, 0]

    positions = [[float(num) for num in l.strip().split()] for l in open(position_fn, 'r')]
    positions = [Position(p[0], p[1], p[2], 'polar') for p in positions]
    binauralizer = SourceBinauralizer(use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir)

    if len(positions) == 1:     # Stationary source
        source = PositionalSource(mono, positions[0], rate)
        stereo = binauralizer.binauralize(source)

    else:
        source = MovingSource(mono, positions, rate)
        stereo = np.zeros((mono.shape[0], 2))
        while source.tic():
            binauralizer.binauralize_frame(source, stereo, source.cur_idx)

    save_wav(output_fn, stereo, rate)
예제 #14
0
    def get(self, start_time, size, rotation=None):
        # Check if padding is necessary
        start_frame = int(start_time * self.rate)
        pad_before, pad_after = 0, 0
        if start_frame < 0:
            pad_before = abs(start_frame)
            size -= pad_before
            start_time, start_frame = 0., 0
        if start_frame + size > self.num_frames:
            pad_after = start_frame + size - self.num_frames
            size -= pad_after
            
        # Load audio
        index = range(int(start_time), min(int(np.ceil(start_time + size / float(self.rate))), self.num_files))
        fns = ['{}/{:06d}.wav'.format(self.audio_folder, i) for i in index]
        chunk = [load_wav(fn, self.rate)[0] for fn in fns]
        chunk = np.concatenate(chunk, axis=0) if len(chunk) > 1 else chunk[0]
        ss = int((start_time - int(start_time)) * self.rate)
        chunk = chunk[ss:ss + size, :self.num_channels]

        # Pad
        if pad_before > 0:
            pad = np.zeros((pad_before, self.num_channels))
            chunk = np.concatenate((pad, chunk), axis=0)
        if pad_after > 0:
            pad = np.zeros((pad_after, self.num_channels))
            chunk = np.concatenate((chunk, pad), axis=0)

        # Apply rotation
        if rotation is not None:
            assert -np.pi <= rotation < np.pi
            c = np.cos(rotation)
            s = np.sin(rotation)
            rot_mtx = np.array([[1, 0, 0, 0],  # W' = W
                                [0, c, 0, s],  # Y' = X sin + Y cos
                                [0, 0, 1, 0],  # Z' = Z
                                [0, -s, 0, c]]) # X' = X cos - Y sin
            chunk = np.dot(chunk, rot_mtx.T)

        return chunk
예제 #15
0
def test_emd():
    from pyutils.iolib.audio import load_wav

    # Load ambisonics
    ang_res = 10
    sample = 'wav_test/gen_synthetic-M1'
    data, rate = load_wav(sample + '-ambix.wav')
    duration = data.shape[0] / float(rate)

    ambiVis = SphericalAmbisonicsVisualizer(data,
                                            rate,
                                            window=0.1,
                                            angular_res=ang_res)
    # vid_reader = VideoReader(sample+'.avi', ambiVis.visualization_rate(),
    #                          image_preprocessing=lambda x: resize(rgb2gray(x), ambiVis.phi_mesh.shape))
    srcVis = SphericalSourceVisualizer(sample + '-position.txt',
                                       duration,
                                       rate=ambiVis.visualization_rate(),
                                       angular_res=ang_res)

    for rms, frame in izip(ambiVis.loop_frames(), srcVis.loop_frames()):
        print(emd(rms, frame, ambiVis.phi_mesh, ambiVis.nu_mesh))
예제 #16
0
def gen_360video(audio_fn,
                 video_fn,
                 output_fn,
                 inject_meta=False,
                 overlay_map=False,
                 binauralize=False,
                 no_spatial_audio=False):
    from pyutils.iolib.video import VideoReader, VideoWriter
    from pyutils.iolib.audio import load_wav, save_wav
    from pyutils.ambisonics.distance import SphericalAmbisonicsVisualizer
    import tempfile
    from matplotlib import pyplot as plt
    from skimage.transform import resize

    tmp_file = tempfile.mktemp(dir='/tmp/', suffix='.mp4')
    tmp_snd_file = tempfile.mktemp(dir='/tmp/', suffix='.wav')
    tmp_vid_file = tempfile.mktemp(dir='/tmp/', suffix='.mp4')

    print('Splitting')
    cmd = 'ffmpeg -i {} -vn -strict -2 {}'.format(audio_fn, tmp_snd_file)
    print(cmd)
    os.system(cmd)

    cmd = 'ffmpeg -i {} -an -vcodec copy {}'.format(video_fn, tmp_vid_file)
    print(cmd)
    os.system(cmd)

    if overlay_map:
        print('Overlaying spherical map')
        tmp_vid_file2 = tempfile.mktemp(dir='/tmp/', suffix='.mp4')
        ambix, snd_rate = load_wav(tmp_snd_file)
        reader = VideoReader(tmp_vid_file, rate=10)
        writer = VideoWriter(tmp_vid_file2, reader.fps)
        ambiVis = SphericalAmbisonicsVisualizer(ambix[::5], snd_rate / 5.,
                                                5. / reader.fps, 5.)
        cmap = plt.cm.YlOrRd(np.linspace(0, 1, 256))[:, :3]

        cur_rms = ambiVis.get_next_frame()
        cur_rms = (cur_rms - cur_rms.min()) / (cur_rms.max() - cur_rms.min() +
                                               0.005)
        while True:
            prev_rms = cur_rms
            cur_rms = ambiVis.get_next_frame()
            if cur_rms is None:
                break
            cur_rms = (cur_rms - cur_rms.min()) / (cur_rms.max() -
                                                   cur_rms.min() + 0.005)

            for i in range(5):
                frame = reader.get()
                if frame is None:
                    break

                beta = i / 5.
                rms = (1 - beta) * prev_rms + beta * cur_rms
                rms = rms * 2. - 0.7
                rms[rms < 0] = 0
                dir_map = (rms * 255).astype(int)
                dir_map[dir_map > 255] = 255
                dir_map = resize(cmap[dir_map], reader.frame_shape[:2]) * 255

                alpha = resize(rms[:, :, np.newaxis],
                               reader.frame_shape[:2]) * 0.6
                overlay = alpha * dir_map + (1 - alpha) * frame
                writer.write_frame(overlay.astype(np.uint8))

        del writer, reader
        os.remove(tmp_vid_file)
        tmp_vid_file = tmp_vid_file2

    if binauralize:
        print('Binauralizing')
        tmp_snd_file2 = tempfile.mktemp(dir='/tmp/', suffix='.wav')
        ambix, snd_rate = load_wav(tmp_snd_file)
        stereo = np.stack(
            [ambix[:, 0] + ambix[:, 1], ambix[:, 0] - ambix[:, 1]], 1)
        stereo /= (np.abs(stereo).max() / 0.95)
        save_wav(tmp_snd_file2, stereo, snd_rate)

        os.remove(tmp_snd_file)
        tmp_snd_file = tmp_snd_file2

    print('Mixing')
    cmd = 'ffmpeg -y -i {} -i {} -vcodec copy -strict -2 {}'.format(
        tmp_snd_file, tmp_vid_file, tmp_file)
    print(cmd)
    os.system(cmd)

    cwd = os.getcwd()
    output_fn = os.path.join(cwd, output_fn)

    if inject_meta:
        print('Injecting metadata')
        file_dir = os.path.dirname(os.path.realpath(__file__))
        spt_media_dir = os.path.realpath(
            os.path.join(file_dir, '3rd-party', 'spatial-media'))
        os.chdir(spt_media_dir)
        os.system('python spatialmedia -i --stereo=none {} {} {} '.format(
            '' if no_spatial_audio else '--spatial-audio', tmp_file,
            output_fn))
        os.chdir(cwd)
        os.remove(tmp_file)

    else:
        import shutil
        shutil.move(tmp_file, output_fn)

    os.remove(tmp_snd_file)
    os.remove(tmp_vid_file)