Ejemplo n.º 1
0
def create_uniform_factored_stream(stash,
                                   win_length,
                                   partition_labels=None,
                                   working_size=50,
                                   vocab_dim=157,
                                   pitch_shift=True):
    """Return a stream of chord samples, with uniform quality presentation."""
    if partition_labels is None:
        partition_labels = util.partition(stash, quality_map)

    quality_pool = []
    for qual_idx in range(13):
        quality_subindex = util.index_partition_arrays(partition_labels,
                                                       [qual_idx])
        entity_pool = [
            pescador.Streamer(chord_sampler, key, stash, win_length,
                              quality_subindex)
            for key in quality_subindex.keys()
        ]
        stream = pescador.mux(entity_pool, n_samples=None, k=25, lam=20)
        quality_pool.append(pescador.Streamer(stream))

    stream = pescador.mux(quality_pool,
                          n_samples=None,
                          k=working_size,
                          lam=None,
                          with_replacement=False)
    if pitch_shift:
        stream = FX.pitch_shift_cqt(stream)

    return FX.map_to_joint_index(stream, vocab_dim)
Ejemplo n.º 2
0
def compute_chord_averages(stash, win_length=20, num_obs=5000):
    quality_partition = util.partition(stash, quality_map)
    qual_indexes = [
        util.index_partition_arrays(quality_partition, [q]) for q in range(13)
    ]
    qual_pools = [[
        pescador.Streamer(chord_sampler, key, stash, 20, q_idx)
        for key in q_idx
    ] for q_idx in qual_indexes]
    obs_aves = []
    for pool in qual_pools:
        base_stream = pescador.mux(pool, n_samples=None, k=50, lam=5)
        for root in range(12):
            stream = FX.rotate_chord_to_root(base_stream, root)
            x_obs = np.array([stream.next().cqt for _ in range(num_obs)])
            obs_aves.append(x_obs.mean(axis=0).squeeze())
            print len(obs_aves)

    null_index = util.index_partition_arrays(quality_partition, [13])
    null_pool = [
        pescador.Streamer(chord_sampler, key, stash, 20, null_index)
        for key in null_index
    ]
    stream = pescador.mux(null_pool, n_samples=None, k=50, lam=5)
    x_obs = np.array([stream.next().cqt for _ in range(num_obs)])
    obs_aves.append(x_obs.mean(axis=0).squeeze())
    return np.array(obs_aves)
Ejemplo n.º 3
0
def sample_chord_qualities(stash, output_dir, win_length=20, num_obs=10000):
    quality_partition = util.partition(stash, quality_map)
    qual_indexes = [
        util.index_partition_arrays(quality_partition, [q]) for q in range(13)
    ]
    qual_pools = [[
        pescador.Streamer(chord_sampler, key, stash, 20, q_idx)
        for key in q_idx
    ] for q_idx in qual_indexes]
    futil.create_directory(output_dir)
    print "[%s] Starting loop" % time.asctime()
    for qual, pool in enumerate(qual_pools):
        base_stream = pescador.mux(pool, n_samples=None, k=50, lam=5)
        for root in range(12):
            stream = FX.rotate_chord_to_root(base_stream, root)
            x_obs = np.array([stream.next().cqt for _ in range(num_obs)])
            chord_idx = qual * 12 + root
            np.save(os.path.join(output_dir, "%03d.npy" % chord_idx), x_obs)
            print "[%s] %3d" % (time.asctime(), chord_idx)
    null_index = util.index_partition_arrays(quality_partition, [13])
    null_pool = [
        pescador.Streamer(chord_sampler, key, stash, 20, null_index)
        for key in null_index
    ]
    stream = pescador.mux(null_pool, n_samples=None, k=50, lam=5)
    x_obs = np.array([stream.next().cqt for _ in range(num_obs)])
    np.save(os.path.join(output_dir, "156.npy"), x_obs)
Ejemplo n.º 4
0
def muxed_uniform_chord_stream(stash, synth_stash, win_length, vocab_dim=157,
                               pitch_shift=0, working_size=4):
    """Return a stream of chord samples, merging two separate datasets."""
    partition_labels = util.partition(stash, chord_map)
    synth_partition_labels = util.partition(synth_stash, chord_map)

    valid_idx = range(vocab_dim)
    valid_idx_synth = range(60, vocab_dim - 1)

    chord_pool = []
    for chord_idx in valid_idx:
        subindex = util.index_partition_arrays(partition_labels, [chord_idx])
        entity_pool = [pescador.Streamer(chord_sampler, key, stash,
                                         win_length, subindex)
                       for key in subindex.keys()]
        if chord_idx in valid_idx_synth:
            subindex = util.index_partition_arrays(
                synth_partition_labels, [chord_idx])
            synth_pool = [pescador.Streamer(chord_sampler, key, synth_stash,
                                            win_length, subindex)
                          for key in subindex.keys()]
            entity_pool.extend(synth_pool)
        if len(entity_pool) == 0:
            continue
        stream = pescador.mux(
            entity_pool, n_samples=None, k=working_size, lam=20)
        chord_pool.append(pescador.Streamer(stream))

    stream = pescador.mux(chord_pool, n_samples=None, k=vocab_dim, lam=None,
                          with_replacement=False)
    if pitch_shift:
        stream = FX.pitch_shift_cqt(stream, max_pitch_shift=pitch_shift)

    return FX.map_to_chord_index(stream, vocab_dim)
Ejemplo n.º 5
0
def learn_codebook(collection, n_codewords, working_size, max_iter, n_samples, buffer_size):
    """Learn the feature transformation"""

    # Get the collection's tracks
    tracks = seymour.get_collection_tracks(collection)

    print 'Learning from collection [%s], %d tracks' % (collection, len(tracks))

    print 'Learning the feature scaling... '
    # Create a data stream to learn a whitening transformer
    seeds = [pescador.Streamer(feature_stream, t) for t in tracks]
    mux_stream = pescador.mux(seeds, max_iter, working_size, lam=n_samples)

    # Build the whitening transform
    transformer = pescador.StreamLearner(Whitening(), batch_size=buffer_size)
    transformer.iter_fit(mux_stream)

    print 'Learning the codebook... '
    # Create a new data stream that uses the whitener prior to running k-means
    # This could also be done with a sklearn.pipeline, probably?
    seeds = [pescador.Streamer(feature_stream, t, transform=transformer) for t in tracks]
    mux_stream = pescador.mux(seeds, max_iter, working_size, lam=n_samples)

    # Build the codebook estimator. 
    encoder_ = VectorQuantizer(clusterer=HartiganOnline(n_clusters=n_codewords))
    encoder = pescador.StreamLearner(encoder_, batch_size=buffer_size)
    encoder.iter_fit(mux_stream)
    
    return transformer, encoder
Ejemplo n.º 6
0
def create_contrastive_chord_stream(stash, win_length, valid_idx=None,
                                    partition_labels=None, working_size=2,
                                    vocab_dim=157, pitch_shift=0,
                                    neg_probs=None):
    """Return a stream of chord samples, with equal positive and negative
    examples."""
    if partition_labels is None:
        partition_labels = util.partition(stash, chord_map)

    if valid_idx is None:
        valid_idx = range(vocab_dim)

    if neg_probs is None:
        neg_probs = np.ones([vocab_dim]*2)
        neg_probs[np.eye(vocab_dim, dtype=bool)] = 0.0
        neg_probs = util.normalize(neg_probs, axis=1)

    chord_streams = []
    has_data = np.ones(vocab_dim, dtype=bool)
    for chord_idx in valid_idx:
        subindex = util.index_partition_arrays(partition_labels, [chord_idx])
        entity_pool = [pescador.Streamer(chord_sampler, key, stash,
                                         win_length, subindex)
                       for key in subindex.keys()]
        if len(entity_pool) == 0:
            has_data[chord_idx] = False
            stream = None
        else:
            stream = pescador.mux(
                entity_pool, n_samples=None, k=working_size, lam=20)
        chord_streams.append(stream)

    chord_streams = np.array(chord_streams)
    binary_pool = []
    for chord_idx in range(vocab_dim):
        if chord_streams[chord_idx] is None:
            continue

        # Skip contrast streams with (a) no data or (b) no probability.
        not_chord_probs = neg_probs[chord_idx]
        not_chord_probs[chord_idx] = 0.0
        not_chord_probs *= has_data
        nidx = not_chord_probs > 0.0
        assert not_chord_probs.sum() > 0.0
        chord_pool = [pescador.Streamer(x)
                      for x in chord_streams[nidx]]
        neg_stream = pescador.mux(chord_pool, n_samples=None,
                                  k=len(chord_pool), lam=None,
                                  with_replacement=False,
                                  pool_weights=not_chord_probs[nidx])
        pair_stream = itertools.izip(chord_streams[chord_idx], neg_stream)
        binary_pool.append(pescador.Streamer(pair_stream))

    cstream = pescador.mux(binary_pool, n_samples=None, k=len(binary_pool),
                           lam=None, with_replacement=False)
    return FX.unpack_contrastive_pairs(cstream, vocab_dim)
Ejemplo n.º 7
0
def test_restart_mux():
    s1 = pescador.Streamer('abc')
    s2 = pescador.Streamer('def')
    mux = pescador.Mux([s1, s2],
                       k=2,
                       rate=None,
                       revive=True,
                       with_replacement=False,
                       random_state=1234)
    assert len(list(mux(max_iter=100))) == len(list(mux(max_iter=100)))
Ejemplo n.º 8
0
def create_uniform_chord_index_stream(stash, win_length, lexicon,
                                      index_mapper=map_chord_labels,
                                      sample_func=slice_cqt_entity,
                                      pitch_shift_func=FX.pitch_shift_cqt,
                                      max_pitch_shift=0, working_size=4,
                                      partition_labels=None, valid_idx=None):
    """Return a stream of chord samples, with uniform quality presentation.

    Parameters
    ----------
    stash : biggie.Stash
        A collection of chord entities.
    win_length : int
        Length of a given tile slice.
    lexicon : lexicon.Lexicon
        Instantiated chord lexicon for mapping labels to indices.
    working_size : int
        Number of open streams at a time.
    pitch_shift : int
        Maximum number of semitones (+/-) to rotate an observation.
    partition_labels : dict


    Returns
    -------
    stream : generator
        Data stream of windowed chord entities.
    """
    if partition_labels is None:
        partition_labels = util.partition(stash, index_mapper, lexicon)

    if valid_idx is None:
        valid_idx = range(lexicon.num_classes)

    chord_pool = []
    for chord_idx in valid_idx:
        subindex = util.index_partition_arrays(partition_labels, [chord_idx])
        entity_pool = [pescador.Streamer(chord_sampler, key, stash,
                                         win_length, subindex,
                                         sample_func=sample_func)
                       for key in subindex.keys()]
        if len(entity_pool) == 0:
            continue
        stream = pescador.mux(
            entity_pool, n_samples=None, k=working_size, lam=20)
        chord_pool.append(pescador.Streamer(stream))

    stream = pescador.mux(chord_pool, n_samples=None, k=lexicon.vocab_dim,
                          lam=None, with_replacement=False)
    if max_pitch_shift > 0:
        stream = pitch_shift_func(stream, max_pitch_shift=max_pitch_shift)

    return FX.map_to_class_index(stream, index_mapper, lexicon)
Ejemplo n.º 9
0
def create_fretboard_stream(stash,
                            win_length,
                            vocab,
                            targets,
                            working_size=50,
                            sample_func=util.slice_cqt_entity):
    """Return an unconstrained stream of chord samples with class indexes.

    Parameters
    ----------
    stash : biggie.Stash
        A collection of chord entities.
    win_length : int
        Length of a given tile slice.
    working_size : int
        Number of open streams at a time.

    Returns
    -------
    stream : generator
        Data stream of windowed cqt-fret entities.
    """
    entity_pool = [
        pescador.Streamer(cqt_sampler,
                          key,
                          stash,
                          win_length,
                          sample_func=sample_func) for key in stash.keys()
    ]

    stream = pescador.mux(entity_pool, None, working_size, lam=25)
    return futil.fretboard_mapper(stream, vocab, targets)
Ejemplo n.º 10
0
    def setup(self):
        """Perform the setup to prepare for streaming."""
        # Instrument names
        instruments = list(self.features_df["instrument"].unique())

        # Get Muxes for each instrument.
        inst_muxes = [self._instrument_mux(i) for i in instruments]

        # Construct the streams for each mux.
        mux_streams = [
            pescador.Streamer(x) for x in inst_muxes if x is not None
        ]

        # Construct the master mux
        master_mux = pescador.mux(mux_streams, **self.master_mux_params)
        # We have to wrap the mux in a stream so that the buffer
        #  knows what to do with it.
        self.master_stream = pescador.Streamer(master_mux)

        # Now construct the final streamer
        if self.use_zmq:
            self.buffered_streamer = zmq_buffered_stream(
                self.master_stream, self.batch_size)
        else:
            self.buffered_streamer = buffer_stream(self.master_stream,
                                                   self.batch_size)
Ejemplo n.º 11
0
def chord_streamer(stash, win_length, partition_labels=None,
                   vocab_dim=157, working_size=4, valid_idx=None,
                   n_samples=5000, batch_size=50):
    """Return a stream of chord samples, with uniform quality presentation."""
    if partition_labels is None:
        partition_labels = util.partition(stash, D.chord_map)

    if valid_idx is None:
        valid_idx = range(vocab_dim)

    chord_pool = []
    chord_idx = []
    for idx in valid_idx:
        print "Opening %d ..." % idx
        subindex = util.index_partition_arrays(partition_labels, [idx])
        entity_pool = [pescador.Streamer(D.chord_sampler, key, stash,
                                         win_length, subindex)
                       for key in subindex.keys()]
        if len(entity_pool) == 0:
            continue
        stream = pescador.mux(
            entity_pool, n_samples=n_samples, k=working_size, lam=20)
        batch = S.minibatch(FX.map_to_chord_index(stream, vocab_dim),
                            batch_size=batch_size)
        chord_pool.append(batch)
        chord_idx.append(idx)
    print "Done!"
    return chord_pool, np.array(chord_idx)
Ejemplo n.º 12
0
def create_labeled_stream(stash, win_length, working_size=5000, threshold=None,
                          sample_func=slice_cqt_entity):
    """Return an unconstrained stream of samples with class labels.

    Parameters
    ----------
    stash : biggie.Stash
        A collection of chord entities.
    win_length : int
        Length of a given tile slice.
    working_size : int
        Number of open streams at a time.
    threshold : scalar, default=None
        Threshold under which to suppress entities.
    sample_func : callable
        Sampling function to apply to each entity.

    Returns
    -------
    stream : generator
        Data stream of windowed entities.
    """
    args = dict(sample_func=sample_func, threshold=threshold)
    entity_pool = [pescador.Streamer(cqt_sampler, key, stash,
                                     win_length, **args)
                   for key in stash.keys()]

    return pescador.mux(entity_pool, None, working_size, lam=25)
Ejemplo n.º 13
0
def create_target_stream(stash, win_length, working_size=50, max_pitch_shift=0,
                         bins_per_pitch=1, sample_func=slice_cqt_entity,
                         mapper=FX.map_to_chroma):
    """Return an unconstrained stream of chord samples with class indexes.

    Parameters
    ----------
    stash : biggie.Stash
        A collection of chord entities.
    win_length : int
        Length of a given tile slice.
    lexicon : lexicon.Lexicon
        Instantiated chord lexicon for mapping labels to indices.
    working_size : int
        Number of open streams at a time.
    max_pitch_shift : int
        Maximum number of semitones (+/-) to rotate an observation.
    partition_labels : dict


    Returns
    -------
    stream : generator
        Data stream of windowed chord entities.
    """
    entity_pool = [pescador.Streamer(chord_sampler, key, stash, win_length,
                                     sample_func=sample_func)
                   for key in stash.keys()]

    stream = pescador.mux(entity_pool, None, working_size, lam=25)
    if max_pitch_shift > 0:
        stream = FX.pitch_shift_cqt(stream, max_pitch_shift=max_pitch_shift)

    return mapper(stream, bins_per_pitch)
Ejemplo n.º 14
0
 def setup_mux(self, streams, stream_weights):
     stream_mux = pescador.mux(streams,
                               n_samples=None,
                               k=len(streams),
                               lam=None,
                               pool_weights=stream_weights)
     self.streamer = pescador.Streamer(stream_mux)
Ejemplo n.º 15
0
def create_fretboard_stream(stash, win_length, vocab, targets,
                            working_size=50,
                            sample_func=util.slice_cqt_entity):
    """Return an unconstrained stream of chord samples with class indexes.

    Parameters
    ----------
    stash : biggie.Stash
        A collection of chord entities.
    win_length : int
        Length of a given tile slice.
    working_size : int
        Number of open streams at a time.

    Returns
    -------
    stream : generator
        Data stream of windowed cqt-fret entities.
    """
    entity_pool = [pescador.Streamer(cqt_sampler, key, stash,
                                     win_length, sample_func=sample_func)
                   for key in stash.keys()]

    stream = pescador.mux(entity_pool, None, working_size, lam=25)
    return futil.fretboard_mapper(stream, vocab, targets)
Ejemplo n.º 16
0
def muxed_uniform_chord_stream(stash,
                               synth_stash,
                               win_length,
                               vocab_dim=157,
                               pitch_shift=0,
                               working_size=4):
    """Return a stream of chord samples, merging two separate datasets."""
    partition_labels = util.partition(stash, chord_map)
    synth_partition_labels = util.partition(synth_stash, chord_map)

    valid_idx = range(vocab_dim)
    valid_idx_synth = range(60, vocab_dim - 1)

    chord_pool = []
    for chord_idx in valid_idx:
        subindex = util.index_partition_arrays(partition_labels, [chord_idx])
        entity_pool = [
            pescador.Streamer(chord_sampler, key, stash, win_length, subindex)
            for key in subindex.keys()
        ]
        if chord_idx in valid_idx_synth:
            subindex = util.index_partition_arrays(synth_partition_labels,
                                                   [chord_idx])
            synth_pool = [
                pescador.Streamer(chord_sampler, key, synth_stash, win_length,
                                  subindex) for key in subindex.keys()
            ]
            entity_pool.extend(synth_pool)
        if len(entity_pool) == 0:
            continue
        stream = pescador.mux(entity_pool,
                              n_samples=None,
                              k=working_size,
                              lam=20)
        chord_pool.append(pescador.Streamer(stream))

    stream = pescador.mux(chord_pool,
                          n_samples=None,
                          k=vocab_dim,
                          lam=None,
                          with_replacement=False)
    if pitch_shift:
        stream = FX.pitch_shift_cqt(stream, max_pitch_shift=pitch_shift)

    return FX.map_to_chord_index(stream, vocab_dim)
Ejemplo n.º 17
0
def neighbor_stream(neighbors, dataset, slice_func,
                    working_size=10, lam=25, with_meta=False,
                    **kwargs):
    """Produce a sample stream of positive and negative examples.

    Parameters
    ----------
    neighbors : dict of lists
        Map of neighborhood keys (names) to lists of related indexes.

    dataset : pd.DataFrame
        Dataset from which to sample.

    slice_func : callable
        Method for slicing observations from a npz archive.

    working_size : int, default=10
        Number of sample sources to keep alive.

    lam : number, default=25
        Sample refresh-rate parameter.

    with_meta : bool, default=False
        If True, yields a tuple of (X, Y) data.

    kwargs : dict
        Keyword arguments to pass through to the slicing function.

    Yields
    ------
    x_in, x_same, x_diff : np.ndarrays
        Tensors corresponding to the base observation, a similar datapoint,
        and a different one.

    y_in, y_same, y_diff : dicts
        Metadata corresponding to the samples 'x' data.
    """
    streams = dict()
    for key, indexes in neighbors.items():
        seed_pool = [pescador.Streamer(slice_func, dataset.loc[idx], **kwargs)
                     for idx in indexes]
        streams[key] = pescador.mux(seed_pool, n_samples=None,
                                    k=working_size, lam=lam)
    while True:
        keys = list(streams.keys())
        idx = random.choice(keys)
        x_in, y_in = next(streams[idx])
        x_same, y_same = next(streams[idx])
        keys.remove(idx)
        idx = random.choice(keys)
        x_diff, y_diff = next(streams[idx])
        result = (dict(x_in=x_in, x_same=x_same, x_diff=x_diff),
                  dict(y_in=y_in, y_same=y_same, y_diff=y_diff))
        yield result if with_meta else result[0]
Ejemplo n.º 18
0
def neighbor_stream(neighbors, dataset, slice_func,
                    working_size=10, lam=25, with_meta=False,
                    **kwargs):
    """Produce a sample stream of positive and negative examples.

    Parameters
    ----------
    neighbors : dict of lists
        Map of neighborhood keys (names) to lists of related indexes.

    dataset : pd.DataFrame
        Dataset from which to sample.

    slice_func : callable
        Method for slicing observations from a npz archive.

    working_size : int, default=10
        Number of sample sources to keep alive.

    lam : number, default=25
        Sample refresh-rate parameter.

    with_meta : bool, default=False
        If True, yields a tuple of (X, Y) data.

    kwargs : dict
        Keyword arguments to pass through to the slicing function.

    Yields
    ------
    x_in, x_same, x_diff : np.ndarrays
        Tensors corresponding to the base observation, a similar datapoint,
        and a different one.

    y_in, y_same, y_diff : dicts
        Metadata corresponding to the samples 'x' data.
    """
    streams = dict()
    for key, indexes in neighbors.items():
        seed_pool = [pescador.Streamer(slice_func, dataset.loc[idx], **kwargs)
                     for idx in indexes]
        streams[key] = pescador.mux(seed_pool, n_samples=None,
                                    k=working_size, lam=lam)
    while True:
        keys = list(streams.keys())
        idx = random.choice(keys)
        x_in, y_in = next(streams[idx])
        x_same, y_same = next(streams[idx])
        keys.remove(idx)
        idx = random.choice(keys)
        x_diff, y_diff = next(streams[idx])
        result = (dict(x_in=x_in, x_same=x_same, x_diff=x_diff),
                  dict(y_in=y_in, y_same=y_same, y_diff=y_diff))
        yield result if with_meta else result[0]
Ejemplo n.º 19
0
def create_chord_index_stream(stash,
                              win_length,
                              lexicon,
                              index_mapper=map_chord_labels,
                              sample_func=slice_cqt_entity,
                              pitch_shift_func=FX.pitch_shift_cqt,
                              max_pitch_shift=0,
                              working_size=50,
                              partition_labels=None,
                              valid_idx=None):
    """Return an unconstrained stream of chord samples with class indexes.

    Parameters
    ----------
    stash : biggie.Stash
        A collection of chord entities.
    win_length : int
        Length of a given tile slice.
    lexicon : lexicon.Lexicon
        Instantiated chord lexicon for mapping labels to indices.
    working_size : int
        Number of open streams at a time.
    pitch_shift : int
        Maximum number of semitones (+/-) to rotate an observation.
    partition_labels : dict


    Returns
    -------
    stream : generator
        Data stream of windowed chord entities.
    """
    if partition_labels is None:
        partition_labels = util.partition(stash, index_mapper, lexicon)

    if valid_idx is None:
        valid_idx = range(lexicon.num_classes)

    chord_index = util.index_partition_arrays(partition_labels, valid_idx)
    entity_pool = [
        pescador.Streamer(chord_sampler,
                          key,
                          stash,
                          win_length,
                          chord_index,
                          sample_func=sample_func) for key in stash.keys()
    ]

    stream = pescador.mux(entity_pool, None, working_size, lam=25)
    if max_pitch_shift > 0:
        stream = pitch_shift_func(stream, max_pitch_shift=max_pitch_shift)

    return FX.map_to_class_index(stream, index_mapper, lexicon)
Ejemplo n.º 20
0
def compute_chord_averages(stash, win_length=20, num_obs=5000):
    quality_partition = util.partition(stash, quality_map)
    qual_indexes = [util.index_partition_arrays(quality_partition, [q])
                    for q in range(13)]
    qual_pools = [[pescador.Streamer(chord_sampler, key, stash, 20, q_idx)
                   for key in q_idx] for q_idx in qual_indexes]
    obs_aves = []
    for pool in qual_pools:
        base_stream = pescador.mux(pool, n_samples=None, k=50, lam=5)
        for root in range(12):
            stream = FX.rotate_chord_to_root(base_stream, root)
            x_obs = np.array([stream.next().cqt for _ in range(num_obs)])
            obs_aves.append(x_obs.mean(axis=0).squeeze())
            print len(obs_aves)

    null_index = util.index_partition_arrays(quality_partition, [13])
    null_pool = [pescador.Streamer(chord_sampler, key, stash, 20, null_index)
                 for key in null_index]
    stream = pescador.mux(null_pool, n_samples=None, k=50, lam=5)
    x_obs = np.array([stream.next().cqt for _ in range(num_obs)])
    obs_aves.append(x_obs.mean(axis=0).squeeze())
    return np.array(obs_aves)
Ejemplo n.º 21
0
def sample_chord_qualities(stash, output_dir, win_length=20, num_obs=10000):
    quality_partition = util.partition(stash, quality_map)
    qual_indexes = [util.index_partition_arrays(quality_partition, [q])
                    for q in range(13)]
    qual_pools = [[pescador.Streamer(chord_sampler, key, stash, 20, q_idx)
                   for key in q_idx] for q_idx in qual_indexes]
    futil.create_directory(output_dir)
    print "[%s] Starting loop" % time.asctime()
    for qual, pool in enumerate(qual_pools):
        base_stream = pescador.mux(pool, n_samples=None, k=50, lam=5)
        for root in range(12):
            stream = FX.rotate_chord_to_root(base_stream, root)
            x_obs = np.array([stream.next().cqt for _ in range(num_obs)])
            chord_idx = qual*12 + root
            np.save(os.path.join(output_dir, "%03d.npy" % chord_idx), x_obs)
            print "[%s] %3d" % (time.asctime(), chord_idx)
    null_index = util.index_partition_arrays(quality_partition, [13])
    null_pool = [pescador.Streamer(chord_sampler, key, stash, 20, null_index)
                 for key in null_index]
    stream = pescador.mux(null_pool, n_samples=None, k=50, lam=5)
    x_obs = np.array([stream.next().cqt for _ in range(num_obs)])
    np.save(os.path.join(output_dir, "156.npy"), x_obs)
Ejemplo n.º 22
0
def load_dataset(stash, ):
    partition_labels = util.partition(stash, D.quality_map)
    qual_obs = []
    for q in range(13):
        qindex = util.index_partition_arrays(partition_labels, [q])
        entity_pool = [pescador.Streamer(D.chroma_stepper, k, stash, qindex)
                       for k in qindex]
        stream = pescador.mux(entity_pool, n_samples=None, k=50, lam=None,
                              with_replacement=False)
        obs = np.array([x for x in FX.rotate_chroma_to_root(stream, 0)])
        qual_obs.append(util.normalize(obs, axis=1))

    return qual_obs
Ejemplo n.º 23
0
def create_uniform_factored_stream(stash, win_length, partition_labels=None,
                                   working_size=50, vocab_dim=157,
                                   pitch_shift=True):
    """Return a stream of chord samples, with uniform quality presentation."""
    if partition_labels is None:
        partition_labels = util.partition(stash, quality_map)

    quality_pool = []
    for qual_idx in range(13):
        quality_subindex = util.index_partition_arrays(
            partition_labels, [qual_idx])
        entity_pool = [pescador.Streamer(chord_sampler, key, stash,
                                         win_length, quality_subindex)
                       for key in quality_subindex.keys()]
        stream = pescador.mux(entity_pool, n_samples=None, k=25, lam=20)
        quality_pool.append(pescador.Streamer(stream))

    stream = pescador.mux(quality_pool, n_samples=None, k=working_size,
                          lam=None, with_replacement=False)
    if pitch_shift:
        stream = FX.pitch_shift_cqt(stream)

    return FX.map_to_joint_index(stream, vocab_dim)
Ejemplo n.º 24
0
def class_stream(neighbors, dataset, working_size=20, lam=5, with_meta=False):
    streams = dict()
    for key, indexes in neighbors.items():
        seed_pool = [pescador.Streamer(slice_embedding, dataset.loc[idx])
                     for idx in indexes]
        streams[key] = pescador.mux(seed_pool, n_samples=None,
                                    k=working_size, lam=lam)
    while True:
        keys = list(streams.keys())
        idx = random.choice(keys)
        x_in, meta = next(streams[idx])

        result = (dict(x_in=x_in, y=np.array([idx])),
                  meta)
        yield result if with_meta else result[0]
Ejemplo n.º 25
0
def class_stream(neighbors, dataset, working_size=20, lam=5, with_meta=False):
    streams = dict()
    for key, indexes in neighbors.items():
        seed_pool = [pescador.Streamer(slice_embedding, dataset.loc[idx])
                     for idx in indexes]
        streams[key] = pescador.mux(seed_pool, n_samples=None,
                                    k=working_size, lam=lam)
    while True:
        keys = list(streams.keys())
        idx = random.choice(keys)
        x_in, meta = next(streams[idx])

        result = (dict(x_in=x_in, y=np.array([idx])),
                  meta)
        yield result if with_meta else result[0]
Ejemplo n.º 26
0
def create_chord_index_stream(stash, win_length, lexicon,
                              index_mapper=map_chord_labels,
                              sample_func=slice_cqt_entity,
                              pitch_shift_func=FX.pitch_shift_cqt,
                              max_pitch_shift=0, working_size=50,
                              partition_labels=None, valid_idx=None):
    """Return an unconstrained stream of chord samples with class indexes.

    Parameters
    ----------
    stash : biggie.Stash
        A collection of chord entities.
    win_length : int
        Length of a given tile slice.
    lexicon : lexicon.Lexicon
        Instantiated chord lexicon for mapping labels to indices.
    working_size : int
        Number of open streams at a time.
    pitch_shift : int
        Maximum number of semitones (+/-) to rotate an observation.
    partition_labels : dict


    Returns
    -------
    stream : generator
        Data stream of windowed chord entities.
    """
    if partition_labels is None:
        partition_labels = util.partition(stash, index_mapper, lexicon)

    if valid_idx is None:
        valid_idx = range(lexicon.num_classes)

    chord_index = util.index_partition_arrays(partition_labels, valid_idx)
    entity_pool = [pescador.Streamer(chord_sampler, key, stash,
                                     win_length, chord_index,
                                     sample_func=sample_func)
                   for key in stash.keys()]

    stream = pescador.mux(entity_pool, None, working_size, lam=25)
    if max_pitch_shift > 0:
        stream = pitch_shift_func(stream, max_pitch_shift=max_pitch_shift)

    return FX.map_to_class_index(stream, index_mapper, lexicon)
Ejemplo n.º 27
0
    def _instrument_mux(self, instrument):
        """Return a pescador.mux for a single instrument.

        Parameters
        ----------
        instrument : str
            Instrument to select.

        Returns
        -------
        mux : pescador.mux
            A pescador.mux for a single instrument.
        """
        streams = self._instrument_streams(instrument)
        if len(streams):
            return pescador.mux(streams, n_samples=None, **self.instrument_mux_params)
        else:
            return None
Ejemplo n.º 28
0
def load_dataset(stash, ):
    partition_labels = util.partition(stash, D.quality_map)
    qual_obs = []
    for q in range(13):
        qindex = util.index_partition_arrays(partition_labels, [q])
        entity_pool = [
            pescador.Streamer(D.chroma_stepper, k, stash, qindex)
            for k in qindex
        ]
        stream = pescador.mux(entity_pool,
                              n_samples=None,
                              k=50,
                              lam=None,
                              with_replacement=False)
        obs = np.array([x for x in FX.rotate_chroma_to_root(stream, 0)])
        qual_obs.append(util.normalize(obs, axis=1))

    return qual_obs
Ejemplo n.º 29
0
def create_target_stream(stash,
                         win_length,
                         working_size=50,
                         max_pitch_shift=0,
                         bins_per_pitch=1,
                         sample_func=slice_cqt_entity,
                         mapper=FX.map_to_chroma):
    """Return an unconstrained stream of chord samples with class indexes.

    Parameters
    ----------
    stash : biggie.Stash
        A collection of chord entities.
    win_length : int
        Length of a given tile slice.
    lexicon : lexicon.Lexicon
        Instantiated chord lexicon for mapping labels to indices.
    working_size : int
        Number of open streams at a time.
    max_pitch_shift : int
        Maximum number of semitones (+/-) to rotate an observation.
    partition_labels : dict


    Returns
    -------
    stream : generator
        Data stream of windowed chord entities.
    """
    entity_pool = [
        pescador.Streamer(chord_sampler,
                          key,
                          stash,
                          win_length,
                          sample_func=sample_func) for key in stash.keys()
    ]

    stream = pescador.mux(entity_pool, None, working_size, lam=25)
    if max_pitch_shift > 0:
        stream = FX.pitch_shift_cqt(stream, max_pitch_shift=max_pitch_shift)

    return mapper(stream, bins_per_pitch)
Ejemplo n.º 30
0
    def _instrument_mux(self, instrument):
        """Return a pescador.mux for a single instrument.

        Parameters
        ----------
        instrument : str
            Instrument to select.

        Returns
        -------
        mux : pescador.mux
            A pescador.mux for a single instrument.
        """
        streams = self._instrument_streams(instrument)
        if len(streams):
            return pescador.mux(streams,
                                n_samples=None,
                                **self.instrument_mux_params)
        else:
            return None
Ejemplo n.º 31
0
    def setup(self):
        """Perform the setup to prepare for streaming."""
        # Instrument names
        instruments = list(self.features_df["instrument"].unique())

        # Get Muxes for each instrument.
        inst_muxes = [self._instrument_mux(i) for i in instruments]

        # Construct the streams for each mux.
        mux_streams = [pescador.Streamer(x) for x in inst_muxes if x is not None]

        # Construct the master mux
        master_mux = pescador.mux(mux_streams, **self.master_mux_params)
        # We have to wrap the mux in a stream so that the buffer
        #  knows what to do with it.
        self.master_stream = pescador.Streamer(master_mux)

        # Now construct the final streamer
        if self.use_zmq:
            self.buffered_streamer = zmq_buffered_stream(self.master_stream, self.batch_size)
        else:
            self.buffered_streamer = buffer_stream(self.master_stream, self.batch_size)
Ejemplo n.º 32
0
def chord_streamer(stash,
                   win_length,
                   partition_labels=None,
                   vocab_dim=157,
                   working_size=4,
                   valid_idx=None,
                   n_samples=5000,
                   batch_size=50):
    """Return a stream of chord samples, with uniform quality presentation."""
    if partition_labels is None:
        partition_labels = util.partition(stash, D.chord_map)

    if valid_idx is None:
        valid_idx = range(vocab_dim)

    chord_pool = []
    chord_idx = []
    for idx in valid_idx:
        print "Opening %d ..." % idx
        subindex = util.index_partition_arrays(partition_labels, [idx])
        entity_pool = [
            pescador.Streamer(D.chord_sampler, key, stash, win_length,
                              subindex) for key in subindex.keys()
        ]
        if len(entity_pool) == 0:
            continue
        stream = pescador.mux(entity_pool,
                              n_samples=n_samples,
                              k=working_size,
                              lam=20)
        batch = S.minibatch(FX.map_to_chord_index(stream, vocab_dim),
                            batch_size=batch_size)
        chord_pool.append(batch)
        chord_idx.append(idx)
    print "Done!"
    return chord_pool, np.array(chord_idx)
Ejemplo n.º 33
0
def create_embedding_stream(dataset, working_size=100, lam=5, **kwargs):
    seed_pool = [pescador.Streamer(slice_embedding, row, **kwargs)
                 for idx, row in dataset.iterrows()]
    return pescador.mux(seed_pool, n_samples=None,
                        k=working_size, lam=lam)
Ejemplo n.º 34
0
def create_contrastive_chord_stream(stash,
                                    win_length,
                                    valid_idx=None,
                                    partition_labels=None,
                                    working_size=2,
                                    vocab_dim=157,
                                    pitch_shift=0,
                                    neg_probs=None):
    """Return a stream of chord samples, with equal positive and negative
    examples."""
    if partition_labels is None:
        partition_labels = util.partition(stash, chord_map)

    if valid_idx is None:
        valid_idx = range(vocab_dim)

    if neg_probs is None:
        neg_probs = np.ones([vocab_dim] * 2)
        neg_probs[np.eye(vocab_dim, dtype=bool)] = 0.0
        neg_probs = util.normalize(neg_probs, axis=1)

    chord_streams = []
    has_data = np.ones(vocab_dim, dtype=bool)
    for chord_idx in valid_idx:
        subindex = util.index_partition_arrays(partition_labels, [chord_idx])
        entity_pool = [
            pescador.Streamer(chord_sampler, key, stash, win_length, subindex)
            for key in subindex.keys()
        ]
        if len(entity_pool) == 0:
            has_data[chord_idx] = False
            stream = None
        else:
            stream = pescador.mux(entity_pool,
                                  n_samples=None,
                                  k=working_size,
                                  lam=20)
        chord_streams.append(stream)

    chord_streams = np.array(chord_streams)
    binary_pool = []
    for chord_idx in range(vocab_dim):
        if chord_streams[chord_idx] is None:
            continue

        # Skip contrast streams with (a) no data or (b) no probability.
        not_chord_probs = neg_probs[chord_idx]
        not_chord_probs[chord_idx] = 0.0
        not_chord_probs *= has_data
        nidx = not_chord_probs > 0.0
        assert not_chord_probs.sum() > 0.0
        chord_pool = [pescador.Streamer(x) for x in chord_streams[nidx]]
        neg_stream = pescador.mux(chord_pool,
                                  n_samples=None,
                                  k=len(chord_pool),
                                  lam=None,
                                  with_replacement=False,
                                  pool_weights=not_chord_probs[nidx])
        pair_stream = itertools.izip(chord_streams[chord_idx], neg_stream)
        binary_pool.append(pescador.Streamer(pair_stream))

    cstream = pescador.mux(binary_pool,
                           n_samples=None,
                           k=len(binary_pool),
                           lam=None,
                           with_replacement=False)
    return FX.unpack_contrastive_pairs(cstream, vocab_dim)
Ejemplo n.º 35
0
 def test_restart_mux(self, mux_class):
     s1 = pescador.Streamer('abc')
     s2 = pescador.Streamer('def')
     mux = mux_class([s1, s2], 2, rate=None, random_state=1234)
     assert len(list(mux(max_iter=100))) == len(list(mux(max_iter=100)))
Ejemplo n.º 36
0
    def test_mux_inf_loop(self, mux_class):
        s1 = pescador.Streamer([])
        s2 = pescador.Streamer([])
        mux = mux_class([s1, s2], 2, rate=None, random_state=1234)

        assert len(list(mux(max_iter=100))) == 0
Ejemplo n.º 37
0
def create_pairwise_stream(stash, win_length, working_size=100, threshold=None,
                           sample_func=slice_cqt_entity):
    """Return a stream of samples, with equal positive and negative
    examples.

    Parameters
    ----------
    stash : biggie.Stash
        A collection of chord entities.
    win_length : int
        Length of a given tile slice.
    working_size : int
        Number of open streams at a time.
    threshold : scalar, default=None
        Threshold under which to suppress entities.
    sample_func : callable
        Sampling function to apply to each entity.

    Returns
    -------
    stream : generator
        Data stream of windowed entities.
    """
    args = dict(sample_func=sample_func)
    if threshold is not None:
        args.update(threshold=threshold)

    keys = stash.keys()
    partitions = dict()
    # Group keys by instrument code
    for k in keys:
        icode = k.split("_")[0]
        if icode not in partitions:
            partitions[icode] = list()
        partitions[icode].append(k)

    inst_streams = []
    for icode, key_set in partitions.items():
        entity_pool = [pescador.Streamer(cqt_sampler, key, stash, win_length,
                                         **args)
                       for key in key_set]
        stream = pescador.mux(entity_pool, n_samples=None,
                              k=working_size, lam=1)
        inst_streams.append(stream)

    inst_streams = np.array(inst_streams)
    triple_pool = []
    nrange = np.arange(len(inst_streams))
    for idx, stream in enumerate(inst_streams):
        neg_pool = [pescador.Streamer(x)
                    for x in inst_streams[np.not_equal(nrange, idx)]]
        neg_stream = pescador.mux(neg_pool, n_samples=None,
                                  k=len(neg_pool), lam=None,
                                  with_replacement=False)
        triples = itertools.izip(
            inst_streams[idx], inst_streams[idx], neg_stream)
        triple_pool.append(pescador.Streamer(triples))

    cstream = pescador.mux(triple_pool, n_samples=None, k=len(triple_pool),
                           lam=None, with_replacement=False)

    return _unpack_triples(cstream)
Ejemplo n.º 38
0
 def setup_mux(self):
     stream_mux = pescador.mux(self.seed_pool,
                               k=len(self.seed_pool),
                               **self.CLASS_MUX_PARAMS)
     self.streamer = pescador.Streamer(stream_mux)
Ejemplo n.º 39
0
def create_embedding_stream(dataset, working_size=100, lam=5, **kwargs):
    seed_pool = [
        pescador.Streamer(slice_embedding, row, **kwargs)
        for idx, row in dataset.iterrows()
    ]
    return pescador.mux(seed_pool, n_samples=None, k=working_size, lam=lam)
Ejemplo n.º 40
0
def create_uniform_chroma_stream(stash,
                                 win_length,
                                 lexicon,
                                 working_size=5,
                                 bins_per_pitch=1,
                                 max_pitch_shift=0,
                                 partition_labels=None,
                                 valid_idx=None):
    """Return an unconstrained stream of chord samples with class indexes.

    Parameters
    ----------
    stash : biggie.Stash
        A collection of chord entities.
    win_length : int
        Length of a given tile slice.
    lexicon : lexicon.Lexicon
        Instantiated chord lexicon for mapping labels to indices.
    working_size : int
        Number of open streams at a time.
    pitch_shift : int
        Maximum number of semitones (+/-) to rotate an observation.
    partition_labels : dict


    Returns
    -------
    stream : generator
        Data stream of windowed chord entities.
    """
    if partition_labels is None:
        partition_labels = util.partition(stash, map_chord_labels, lexicon)

    if valid_idx is None:
        valid_idx = range(lexicon.num_classes)

    chord_pool = []
    for chord_idx in valid_idx:
        subindex = util.index_partition_arrays(partition_labels, [chord_idx])
        entity_pool = [
            pescador.Streamer(chord_sampler,
                              key,
                              stash,
                              win_length,
                              subindex,
                              sample_func=slice_cqt_entity)
            for key in subindex.keys()
        ]
        if len(entity_pool) == 0:
            continue
        stream = pescador.mux(entity_pool,
                              n_samples=None,
                              k=working_size,
                              lam=20)
        chord_pool.append(pescador.Streamer(stream))

    stream = pescador.mux(chord_pool,
                          n_samples=None,
                          k=lexicon.vocab_dim,
                          lam=None,
                          with_replacement=False)

    if max_pitch_shift > 0:
        stream = FX.pitch_shift_cqt(stream, max_pitch_shift=max_pitch_shift)

    return FX.map_to_chroma(stream, bins_per_pitch)
Ejemplo n.º 41
0
    def test_mux_inf_loop(self, mux_class):
        s1 = pescador.Streamer([])
        s2 = pescador.Streamer([])
        mux = mux_class([s1, s2], 2, rate=None, random_state=1234)

        assert len(list(mux(max_iter=100))) == 0
Ejemplo n.º 42
0
 def test_restart_mux(self, mux_class):
     s1 = pescador.Streamer('abc')
     s2 = pescador.Streamer('def')
     mux = mux_class([s1, s2], 2, rate=None, random_state=1234)
     assert len(list(mux(max_iter=100))) == len(list(mux(max_iter=100)))