Ejemplo n.º 1
0
    def _process_annotation(self, annotation):
        # fetch the fft data
        resp = requests.get(annotation['data_url'])
        fft_feature = BinaryData.unpack(resp.content)

        # compute the chroma feature
        chroma = CHROMA_SCALE.apply(fft_feature, zounds.HanningWindowingFunc())
        chroma = zounds.ArrayWithUnits(
            chroma, [fft_feature.dimensions[0],
                     zounds.IdentityDimension()]).astype(self.dtype)

        # pack the chroma data and create the resources
        binary_data = BinaryData(chroma)
        sound_id = self._sound_id_from_uri(annotation['sound'])

        # push output to s3
        data_url = self.s3_client.put_object(
            sound_id, binary_data.packed_file_like_object(),
            'application/octet-stream')

        logger.info(f'pushed binary data to {data_url}')

        # create annotation
        self.client.create_annotations(
            sound_id, {
                'start_seconds': annotation['start_seconds'],
                'duration_seconds': annotation['duration_seconds'],
                'data_url': data_url
            })
        logger.info('created annotation')
Ejemplo n.º 2
0
def produce_fake_hash(x):
    """
    Produce random, binary features, totally irrespective of the content of
    x, but in the same shape as x.
    """
    h = np.random.binomial(1, 0.5, (x.shape[0], 1024))
    packed = np.packbits(h, axis=-1).view(np.uint64)
    return zounds.ArrayWithUnits(
        packed, [x.dimensions[0], zounds.IdentityDimension()])
Ejemplo n.º 3
0
def compute_embedding(samples, network):
    # TODO: resampling can fail for some odd sampling rates
    samples = zounds.soundfile.resample(samples, zounds.SR11025())
    freq = samples.frequency * 8192
    windowed = samples.sliding_window(
        zounds.SampleRate(frequency=freq, duration=freq))
    dims = windowed.dimensions
    output = zounds.learn.apply_network(network, windowed, chunksize=8)
    logger.info(output.shape)
    output = zounds.ArrayWithUnits(
        output, [dims[0], zounds.IdentityDimension()])
    return output
Ejemplo n.º 4
0
    def _process_annotation(self, annotation):
        # fetch the fft data
        resp = requests.get(annotation['data_url'])
        fft_feature = BinaryData.unpack(resp.content)

        # compute the chroma feature
        mel_spectrogram = scale.apply(fft_feature,
                                      zounds.HanningWindowingFunc())
        mel_spectrogram = zounds.ArrayWithUnits(
            mel_spectrogram,
            [fft_feature.dimensions[0],
             zounds.FrequencyDimension(scale)])
        mel_spectrogram = 20 * np.log10(mel_spectrogram + 1)
        mfcc = np.abs(dct(mel_spectrogram, axis=1)[:, 1:14])
        mfcc = zounds.ArrayWithUnits(
            mfcc, [fft_feature.dimensions[0],
                   zounds.IdentityDimension()]).astype(np.float32)

        # pack the chroma data and create the resources
        binary_data = BinaryData(mfcc)
        sound_id = self._sound_id_from_uri(annotation['sound'])

        # push output to s3
        data_url = self.s3_client.put_object(
            sound_id, binary_data.packed_file_like_object(),
            'application/octet-stream')
        logger.info(f'pushed binary data to {data_url}')

        # create annotation
        self.client.create_annotations(
            sound_id, {
                'start_seconds': annotation['start_seconds'],
                'duration_seconds': annotation['duration_seconds'],
                'data_url': data_url
            })
        logger.info('created annotation')
Ejemplo n.º 5
0
    original = snd.resampled
    slow = zounds.AudioSamples(time_stretch(original, 0.75).squeeze(), sr)
    fast = zounds.AudioSamples(time_stretch(original, 1.25).squeeze(), sr)

    higher = zounds.AudioSamples(pitch_shift(original, 1.0).squeeze(), sr)
    lower = zounds.AudioSamples(pitch_shift(original, -1.0).squeeze(), sr)

    # apply a sliding window to demonstrate time stretch and pitch shift in
    # batch mode
    windowing_sr = zounds.SampleRate(frequency=zounds.Seconds(5),
                                     duration=zounds.Seconds(10))

    windowed = snd.resampled.sliding_window(windowing_sr)
    windowed = zounds.ArrayWithUnits(
        windowed, [zounds.IdentityDimension(), windowed.dimensions[1]])

    def samples(x):
        return zounds.AudioSamples(x, sr)

    batch_slow = map(samples, time_stretch(windowed, 0.75))
    batch_fast = map(samples, time_stretch(windowed, 1.25))

    batch_higher = map(samples, pitch_shift(windowed, 1.0))
    batch_lower = map(samples, pitch_shift(windowed, -1.0))

    app = zounds.ZoundsApp(model=Sound,
                           visualization_feature=Sound.fft,
                           audio_feature=Sound.resampled,
                           globals=globals(),
                           locals=locals(),