Ejemplo n.º 1
0
 def _transform_indices(self, indices):
     for index in indices:
         sound_id, time_slice = self.time_slices[index]
         start = time_slice.start / zounds.Seconds(1)
         duration = time_slice.duration / zounds.Seconds(1)
         data = {
             'created_by': self.user_uri,
             'sound': sound_id,
             'start_seconds': start,
             'duration_seconds': duration,
             'end_seconds': start + duration
         }
         yield data
Ejemplo n.º 2
0
 def get_metadata(self):
     return {
         'type':
         str(self.dtype),
         'shape': ('variable', CHROMA_SCALE.n_bands),
         'dimensions': [{
             'type':
             'time',
             'sample_frequency_seconds':
             windowing_sample_rate.frequency / zounds.Seconds(1),
             'sample_duration_seconds':
             windowing_sample_rate.duration / zounds.Seconds(1)
         }, {
             'type': 'identity'
         }]
     }
Ejemplo n.º 3
0
class Document(BaseModel):
    bark = zounds.ArrayWithUnitsFeature(zounds.BarkBands,
                                        samplerate=samplerate,
                                        stop_freq_hz=samplerate.nyquist,
                                        needs=BaseModel.fft,
                                        store=True)

    long_windowed = zounds.ArrayWithUnitsFeature(
        zounds.SlidingWindow,
        wscheme=zounds.SampleRate(frequency=zounds.Milliseconds(500),
                                  duration=zounds.Seconds(1)),
        wfunc=windowing_func,
        needs=BaseModel.resampled,
        store=True)

    dct = zounds.ArrayWithUnitsFeature(zounds.DCT,
                                       scale_always_even=True,
                                       needs=long_windowed,
                                       store=True)

    mdct = zounds.FrequencyAdaptiveFeature(zounds.FrequencyAdaptiveTransform,
                                           transform=scipy.fftpack.idct,
                                           scale=scale,
                                           needs=dct,
                                           store=True)
Ejemplo n.º 4
0
 def total_duration(doc, ts):
     try:
         duration = duration_cache[doc._id]
     except KeyError:
         duration = doc.geom.dimensions[0].end / zounds.Seconds(1)
         duration_cache[doc._id] = duration
     return duration
Ejemplo n.º 5
0
def synthetic():
    for i in xrange(100):
        duration = zounds.Seconds(np.random.randint(2, 20))
        root = np.random.randint(50, 400)
        hz = [root]
        for _ in xrange(0):
            hz.append(hz[-1] * 2)
        synth = zounds.SineSynthesizer(samplerate)
        s = synth.synthesize(duration, hz)
        yield s.encode()
Ejemplo n.º 6
0
def transform_search_result(result, req, nresults):
    _id, ts, extra_data = result

    ts = WebTimeSlice(ts)
    quoted_id = urllib.quote(_id, safe='')
    qs = ts.to_query_string()

    start = ts.start / zounds.Seconds(1)
    duration = ts.duration / zounds.Seconds(1)
    end = start + duration

    return dict(_id=_id,
                start=start,
                duration=duration,
                end=end,
                search=str(
                    SearchUri(quoted_id,
                              req=req,
                              timeslice=ts,
                              nresults=nresults)),
                bark=str(
                    FeatureUri(quoted_id=quoted_id,
                               feature='geom',
                               timeslice_query_string=qs,
                               req=req)),
                hashed=str(
                    FeatureUri(quoted_id=quoted_id,
                               feature='hashed',
                               timeslice_query_string=qs,
                               req=req)),
                audio=str(
                    FeatureUri(quoted_id=quoted_id,
                               timeslice_query_string=qs,
                               feature='ogg',
                               req=req)),
                meta=str(
                    FeatureUri(quoted_id=quoted_id,
                               feature='meta',
                               timeslice_query_string=qs,
                               req=req)),
                **extra_data)
Ejemplo n.º 7
0
def main(user_name,
         bucket_name,
         email,
         about_me,
         info_url,
         listener_cls,
         page_size=100,
         logger=None):
    parser = argparse.ArgumentParser(parents=[DefaultArgumentParser()])
    args = parser.parse_args()
    client = Client(args.annotate_api_endpoint, logger=logger)

    object_storage_client = ObjectStorageClient(
        endpoint=args.s3_endpoint,
        region=args.s3_region,
        access_key=args.aws_access_key_id,
        secret=args.aws_secret_access_key,
        bucket=bucket_name)

    listener = listener_cls(client,
                            object_storage_client,
                            page_size,
                            logger=logger)

    # get metadata describing feature shape and dimensions
    try:
        metadata = listener.get_metadata()
    except AttributeError:
        # the listener does not provide metadata explicitly, but in the case
        # of listeners that accept audio samples directly, we can infer the
        # metadata
        samples = zounds.AudioSamples.silence(zounds.SR44100(),
                                              zounds.Seconds(10))
        binary_data = listener._process_samples(samples)
        metadata = about_me_metadata(binary_data)

    try:
        with open(about_me, 'r') as f:
            about_me = f.read()
    except IOError:
        pass

    about_me = about_me.format(metadata=json.dumps(metadata, indent=4))

    object_storage_client.ensure_bucket_exists()

    # TODO: Some kind of structured information about transformation pipeline
    # in about me and/or info url
    client.upsert_featurebot(user_name, email, args.password, about_me,
                             info_url)

    with listener.run():
        pass
Ejemplo n.º 8
0
def about_me_metadata(binary_data):
    dims = binary_data.arr.dimensions
    shape = list(binary_data.arr.shape)

    # The first time dimension should be displayed as variable since it depends
    # on the length of the audio input
    if isinstance(dims[0], zounds.TimeDimension):
        shape[0] = 'variable'

    metadata_dims = []
    for dim in dims:
        if isinstance(dim, zounds.TimeDimension):
            metadata_dims.append({
                'type':
                'time',
                'sample_frequency_seconds':
                dim.frequency / zounds.Seconds(1),
                'sample_duration_seconds':
                dim.duration / zounds.Seconds(1)
            })
        elif isinstance(dim, zounds.FrequencyDimension):
            scale = dim.scale
            metadata_dims.append({
                'type': 'frequency',
                'start_hz': scale.start_hz,
                'stop_hz': scale.stop_hz,
                'n_bands': scale.n_bands,
                'scale_type': scale.__class__.__name__
            })
        else:
            metadata_dims.append({'type': 'identity'})

    return {
        'type': str(binary_data.arr.dtype),
        'shape': shape,
        'dimensions': metadata_dims
    }
Ejemplo n.º 9
0
    def time_generator():
        inp = torch.FloatTensor(
            args.time_generator_batch_size, feature_channels, feature_size)\
            .normal_(0, 1)

        total_audio_time = \
            (sr.frequency * total_samples * inp.shape[0]) / zounds.Seconds(1)

        start = time.time()
        bands = generator(inp)
        samples = frequency_recomposition(
            [b.data.cpu().numpy().squeeze() for b in bands.values()], total_samples)
        stop = time.time()
        wall_time = stop - start
        print(f'CPU Generated {total_audio_time} seconds of audio in {wall_time} seconds')
        return samples
Ejemplo n.º 10
0
def load_and_play():
    files = sorted(
        glob.glob('*.npy'),
        cmp=lambda x, y: int(os.stat(x).st_ctime - os.stat(y).st_ctime))
    most_recent = files[-1]
    print 'loading generated examples from', most_recent
    results = np.load(most_recent)

    # synthesized = FrequencyDecomposition.synthesize_block(results)
    synthesized = results

    for raw, result in zip(results, synthesized):
        windowed = zounds.sliding_window(result, 512, 256)
        spec = np.abs(np.fft.rfft(windowed))
        audio_samples = zounds.AudioSamples(result, samplerate) \
            .pad_with_silence(zounds.Seconds(1))
        yield raw, result, audio_samples / audio_samples.max(), spec
Ejemplo n.º 11
0
 def feature_hop_hz(self):
     return \
         zounds.Seconds(1) / (self.samplerate.frequency * self.feature_hop)
Ejemplo n.º 12
0
 def _get_code(self, sound, seconds):
     frequency = self.frequency / zounds.Seconds(1)
     offset = self.sound_offsets[sound]
     window_index = offset + int(seconds / frequency)
     return self.index[window_index]
Ejemplo n.º 13
0
    args = parser.parse_args()

    _id = Sound.process(meta=args.sound_uri)
    snd = Sound(_id)

    original = snd.resampled
    slow = zounds.AudioSamples(time_stretch(original, 0.75).squeeze(), sr)
    fast = zounds.AudioSamples(time_stretch(original, 1.25).squeeze(), sr)

    higher = zounds.AudioSamples(pitch_shift(original, 1.0).squeeze(), sr)
    lower = zounds.AudioSamples(pitch_shift(original, -1.0).squeeze(), sr)

    # apply a sliding window to demonstrate time stretch and pitch shift in
    # batch mode
    windowing_sr = zounds.SampleRate(
        frequency=zounds.Seconds(5),
        duration=zounds.Seconds(10))

    windowed = snd.resampled.sliding_window(windowing_sr)
    windowed = zounds.ArrayWithUnits(
        windowed, [zounds.IdentityDimension(), windowed.dimensions[1]])

    def samples(x):
        return zounds.AudioSamples(x, sr)

    batch_slow = list(map(samples, time_stretch(windowed, 0.75)))
    batch_fast = list(map(samples, time_stretch(windowed, 1.25)))

    batch_higher = list(map(samples, pitch_shift(windowed, 1.0)))
    batch_lower = list(map(samples, pitch_shift(windowed, -1.0)))
Ejemplo n.º 14
0
 def feature_window_len(self):
     return (self.samplerate.frequency * self.feature_window) / zounds.Seconds(1)
Ejemplo n.º 15
0
 def training_sample_len(self):
     return (self.samplerate.frequency * self.training_sample_win) / zounds.Seconds(1)
            loss,
            lambda model: Adam(model.parameters(), lr=0.0001),
            epochs=args.epochs,
            batch_size=batch_size,
            holdout_percent=0.25,
            data_preprocessor=data_preprocessor,
            label_preprocessor=label_preprocessor)

        gen = (snd.windowed for snd in Sound
               if args.internet_archive_id in snd._id)
        pipeline_cls.process(samples=gen, trainer=trainer)

    # instantiate the trained pipeline
    pipeline = pipeline_cls()

    snds = [snd for snd in Sound if args.internet_archive_id in snd._id]
    snd = choice(snds)
    time_slice = zounds.TimeSlice(duration=zounds.Seconds(10))
    encoded = pipeline.pipeline.transform(
        data_preprocessor(snd.windowed[time_slice]))
    recon = encoded.inverse_transform()
    samples = synthesize(recon)

    # start up an in-browser REPL to interact with the results
    app = zounds.ZoundsApp(model=Sound,
                           audio_feature=Sound.ogg,
                           visualization_feature=Sound.windowed,
                           globals=globals(),
                           locals=locals())
    app.start(8888)
Ejemplo n.º 17
0
        code = urllib.unquote(code)
        return Code(base64.urlsafe_b64decode(code))

    @classmethod
    def from_expanded_array(cls, arr):
        packed = arr.packbits(axis=1)
        for p in packed:
            yield Code(p)

    @classmethod
    def from_packed_array(cls, arr):
        for x in arr:
            yield Code(x)


ONE_SECOND = zounds.Seconds(1)


class WebTimeSlice(zounds.TimeSlice):
    def __init__(self, request_or_ts):
        if isinstance(request_or_ts, zounds.TimeSlice):
            ts = request_or_ts
            start = ts.start
            duration = ts.duration
        else:
            request = request_or_ts
            try:
                start = float(request.params['start'])
                start = zounds.Picoseconds(int(start * 1e12))
            except (KeyError, ValueError):
                start = zounds.Picoseconds(0)
Ejemplo n.º 18
0
 def synthesize_iter(self):
     fa = self.as_frequency_adaptive()
     samples = self.__class__.synthesize_block(fa)
     for sample in samples:
         yield sample, zounds.AudioSamples(sample, samplerate) \
             .pad_with_silence(zounds.Seconds(1))
Ejemplo n.º 19
0
 def listen(self):
     return zounds.AudioSamples(self.to_audio()[0], self.samplerate)\
         .pad_with_silence(zounds.Seconds(1))
Ejemplo n.º 20
0
 def judgement_hz(self):
     return [zounds.Seconds(1) / (self.samplerate.frequency * dsr) for dsr in self.downsampling_ratios]
Ejemplo n.º 21
0
        needs=Resampled.resampled,
        wscheme=zounds.HalfLapped(),
        wfunc=zounds.OggVorbisWindowingFunc(),
        store=True)

    mdct = zounds.ArrayWithUnitsFeature(zounds.MDCT, needs=windowed)

    weighted = zounds.ArrayWithUnitsFeature(lambda x: x * zounds.AWeighting(),
                                            needs=mdct)


if __name__ == '__main__':

    # produce some audio to test our pipeline, and encode it as FLAC
    synth = zounds.SineSynthesizer(zounds.SR44100())
    samples = synth.synthesize(zounds.Seconds(5), [220., 440., 880.])
    encoded = samples.encode(fmt='FLAC')

    # process the audio, and fetch features from our in-memory store
    _id = Sound.process(meta=encoded)
    sound = Sound(_id)

    # grab all the frequency information, for a subset of the duration
    start = zounds.Milliseconds(500)
    end = start + zounds.Seconds(2)
    snippet = sound.weighted[start:end, :]

    # grab a subset of frequency information for the duration of the sound
    freq_band = slice(zounds.Hertz(400), zounds.Hertz(500))
    a440 = sound.mdct[:, freq_band]
Ejemplo n.º 22
0
 def total_duration(doc, ts):
     return doc.fake_hash.dimensions[0].end / zounds.Seconds(1)
Ejemplo n.º 23
0
    )
    args = parser.parse_args()

    _id = Sound.process(meta=args.sound_uri)
    snd = Sound(_id)

    original = snd.resampled
    slow = zounds.AudioSamples(time_stretch(original, 0.75).squeeze(), sr)
    fast = zounds.AudioSamples(time_stretch(original, 1.25).squeeze(), sr)

    higher = zounds.AudioSamples(pitch_shift(original, 1.0).squeeze(), sr)
    lower = zounds.AudioSamples(pitch_shift(original, -1.0).squeeze(), sr)

    # apply a sliding window to demonstrate time stretch and pitch shift in
    # batch mode
    windowing_sr = zounds.SampleRate(frequency=zounds.Seconds(5),
                                     duration=zounds.Seconds(10))

    windowed = snd.resampled.sliding_window(windowing_sr)
    windowed = zounds.ArrayWithUnits(
        windowed, [zounds.IdentityDimension(), windowed.dimensions[1]])

    def samples(x):
        return zounds.AudioSamples(x, sr)

    batch_slow = map(samples, time_stretch(windowed, 0.75))
    batch_fast = map(samples, time_stretch(windowed, 1.25))

    batch_higher = map(samples, pitch_shift(windowed, 1.0))
    batch_lower = map(samples, pitch_shift(windowed, -1.0))
Ejemplo n.º 24
0
    app = zounds.ZoundsApp(globals=globals(), locals=locals())
    app.start_in_thread(8888)

    feature_size = 64

    g = DDSPGenerator(feature_size, feature_channels, 128, None, None, None,
                      None) \
        .to(device) \
        .initialize_weights()
    g_optim = Adam(g.parameters(), lr=0.001, betas=(0, 0.9))

    base_path = '/hdd/musicnet/train_data'
    files = os.listdir(base_path)
    file = choice(files)
    samples = zounds.AudioSamples.from_file(os.path.join(
        base_path, file))[:zounds.Seconds(10)]
    samples = zounds.soundfile.resample(samples, zounds.SR11025())

    start = np.random.randint(0, len(samples) - 16384)
    chunk = samples[start:start + 16384]
    chunk /= (chunk.max() + 1e-12)
    # chunk = spec_test[:16384].astype(np.float32)
    orig = chunk.pad_with_silence()

    target = torch.from_numpy(chunk).to(device).view(1, -1)

    current = None
    inp = compute_features(chunk)
    inp = torch.from_numpy(inp).to(device)
    cond = inp.data.cpu().numpy().squeeze().T
Ejemplo n.º 25
0
path = '/hdd/musicnet/train_data'
pattern = '*.wav'
total_samples = 2**17

samplerate = zounds.SR22050()
feature_spec = {'audio': (total_samples, 1)}

feature_funcs = {'audio': (audio, (samplerate, ))}

batch_size = 1
bs = batch_stream(path, pattern, batch_size, feature_spec, 'audio',
                  feature_funcs)

if __name__ == '__main__':
    # app = zounds.ZoundsApp(locals=locals(), globals=globals())
    # app.start_in_thread(9999)
    # samples, = next(bs)
    # samples = torch.from_numpy(samples)
    # min_size = 2 ** (np.log2(total_samples) - 4)
    # bands = fft_frequency_decompose(samples, min_size)
    # samples = zounds.AudioSamples(samples.squeeze(), samplerate)
    # input('Waiting...')

    n_bands = 5
    sr = samplerate
    for i in range(n_bands):
        start_hz = 0 if i == (n_bands - 1) else sr.nyquist / 2
        stop_hz = sr.nyquist
        n_samples = int(zounds.Seconds(1) / sr.frequency)
        print(n_samples, start_hz, stop_hz)
        sr *= 2
Ejemplo n.º 26
0
    dct = zounds.ArrayWithUnitsFeature(zounds.DCT,
                                       scale_always_even=True,
                                       needs=long_windowed,
                                       store=True)

    mdct = zounds.FrequencyAdaptiveFeature(zounds.FrequencyAdaptiveTransform,
                                           transform=scipy.fftpack.idct,
                                           scale=scale,
                                           needs=dct,
                                           store=True)


if __name__ == '__main__':
    # generate some audio
    synth = zounds.TickSynthesizer(zounds.SR22050())
    orig_audio = synth.synthesize(zounds.Seconds(5), zounds.Milliseconds(200))

    # analyze the audio
    _id = Document.process(meta=orig_audio.encode())
    doc = Document(_id)

    synth = zounds.FrequencyAdaptiveDCTSynthesizer(scale, samplerate)
    recon_audio = synth.synthesize(doc.mdct)

    # get a rasterized visualization of the representation
    img = doc.mdct.square(100, do_overlap_add=True)

    app = zounds.ZoundsApp(model=Document,
                           audio_feature=Document.ogg,
                           visualization_feature=Document.bark,
                           globals=globals(),