def create_uniform_factored_stream(stash, win_length, partition_labels=None, working_size=50, vocab_dim=157, pitch_shift=True): """Return a stream of chord samples, with uniform quality presentation.""" if partition_labels is None: partition_labels = util.partition(stash, quality_map) quality_pool = [] for qual_idx in range(13): quality_subindex = util.index_partition_arrays(partition_labels, [qual_idx]) entity_pool = [ pescador.Streamer(chord_sampler, key, stash, win_length, quality_subindex) for key in quality_subindex.keys() ] stream = pescador.mux(entity_pool, n_samples=None, k=25, lam=20) quality_pool.append(pescador.Streamer(stream)) stream = pescador.mux(quality_pool, n_samples=None, k=working_size, lam=None, with_replacement=False) if pitch_shift: stream = FX.pitch_shift_cqt(stream) return FX.map_to_joint_index(stream, vocab_dim)
def compute_chord_averages(stash, win_length=20, num_obs=5000): quality_partition = util.partition(stash, quality_map) qual_indexes = [ util.index_partition_arrays(quality_partition, [q]) for q in range(13) ] qual_pools = [[ pescador.Streamer(chord_sampler, key, stash, 20, q_idx) for key in q_idx ] for q_idx in qual_indexes] obs_aves = [] for pool in qual_pools: base_stream = pescador.mux(pool, n_samples=None, k=50, lam=5) for root in range(12): stream = FX.rotate_chord_to_root(base_stream, root) x_obs = np.array([stream.next().cqt for _ in range(num_obs)]) obs_aves.append(x_obs.mean(axis=0).squeeze()) print len(obs_aves) null_index = util.index_partition_arrays(quality_partition, [13]) null_pool = [ pescador.Streamer(chord_sampler, key, stash, 20, null_index) for key in null_index ] stream = pescador.mux(null_pool, n_samples=None, k=50, lam=5) x_obs = np.array([stream.next().cqt for _ in range(num_obs)]) obs_aves.append(x_obs.mean(axis=0).squeeze()) return np.array(obs_aves)
def sample_chord_qualities(stash, output_dir, win_length=20, num_obs=10000): quality_partition = util.partition(stash, quality_map) qual_indexes = [ util.index_partition_arrays(quality_partition, [q]) for q in range(13) ] qual_pools = [[ pescador.Streamer(chord_sampler, key, stash, 20, q_idx) for key in q_idx ] for q_idx in qual_indexes] futil.create_directory(output_dir) print "[%s] Starting loop" % time.asctime() for qual, pool in enumerate(qual_pools): base_stream = pescador.mux(pool, n_samples=None, k=50, lam=5) for root in range(12): stream = FX.rotate_chord_to_root(base_stream, root) x_obs = np.array([stream.next().cqt for _ in range(num_obs)]) chord_idx = qual * 12 + root np.save(os.path.join(output_dir, "%03d.npy" % chord_idx), x_obs) print "[%s] %3d" % (time.asctime(), chord_idx) null_index = util.index_partition_arrays(quality_partition, [13]) null_pool = [ pescador.Streamer(chord_sampler, key, stash, 20, null_index) for key in null_index ] stream = pescador.mux(null_pool, n_samples=None, k=50, lam=5) x_obs = np.array([stream.next().cqt for _ in range(num_obs)]) np.save(os.path.join(output_dir, "156.npy"), x_obs)
def muxed_uniform_chord_stream(stash, synth_stash, win_length, vocab_dim=157, pitch_shift=0, working_size=4): """Return a stream of chord samples, merging two separate datasets.""" partition_labels = util.partition(stash, chord_map) synth_partition_labels = util.partition(synth_stash, chord_map) valid_idx = range(vocab_dim) valid_idx_synth = range(60, vocab_dim - 1) chord_pool = [] for chord_idx in valid_idx: subindex = util.index_partition_arrays(partition_labels, [chord_idx]) entity_pool = [pescador.Streamer(chord_sampler, key, stash, win_length, subindex) for key in subindex.keys()] if chord_idx in valid_idx_synth: subindex = util.index_partition_arrays( synth_partition_labels, [chord_idx]) synth_pool = [pescador.Streamer(chord_sampler, key, synth_stash, win_length, subindex) for key in subindex.keys()] entity_pool.extend(synth_pool) if len(entity_pool) == 0: continue stream = pescador.mux( entity_pool, n_samples=None, k=working_size, lam=20) chord_pool.append(pescador.Streamer(stream)) stream = pescador.mux(chord_pool, n_samples=None, k=vocab_dim, lam=None, with_replacement=False) if pitch_shift: stream = FX.pitch_shift_cqt(stream, max_pitch_shift=pitch_shift) return FX.map_to_chord_index(stream, vocab_dim)
def learn_codebook(collection, n_codewords, working_size, max_iter, n_samples, buffer_size): """Learn the feature transformation""" # Get the collection's tracks tracks = seymour.get_collection_tracks(collection) print 'Learning from collection [%s], %d tracks' % (collection, len(tracks)) print 'Learning the feature scaling... ' # Create a data stream to learn a whitening transformer seeds = [pescador.Streamer(feature_stream, t) for t in tracks] mux_stream = pescador.mux(seeds, max_iter, working_size, lam=n_samples) # Build the whitening transform transformer = pescador.StreamLearner(Whitening(), batch_size=buffer_size) transformer.iter_fit(mux_stream) print 'Learning the codebook... ' # Create a new data stream that uses the whitener prior to running k-means # This could also be done with a sklearn.pipeline, probably? seeds = [pescador.Streamer(feature_stream, t, transform=transformer) for t in tracks] mux_stream = pescador.mux(seeds, max_iter, working_size, lam=n_samples) # Build the codebook estimator. encoder_ = VectorQuantizer(clusterer=HartiganOnline(n_clusters=n_codewords)) encoder = pescador.StreamLearner(encoder_, batch_size=buffer_size) encoder.iter_fit(mux_stream) return transformer, encoder
def create_contrastive_chord_stream(stash, win_length, valid_idx=None, partition_labels=None, working_size=2, vocab_dim=157, pitch_shift=0, neg_probs=None): """Return a stream of chord samples, with equal positive and negative examples.""" if partition_labels is None: partition_labels = util.partition(stash, chord_map) if valid_idx is None: valid_idx = range(vocab_dim) if neg_probs is None: neg_probs = np.ones([vocab_dim]*2) neg_probs[np.eye(vocab_dim, dtype=bool)] = 0.0 neg_probs = util.normalize(neg_probs, axis=1) chord_streams = [] has_data = np.ones(vocab_dim, dtype=bool) for chord_idx in valid_idx: subindex = util.index_partition_arrays(partition_labels, [chord_idx]) entity_pool = [pescador.Streamer(chord_sampler, key, stash, win_length, subindex) for key in subindex.keys()] if len(entity_pool) == 0: has_data[chord_idx] = False stream = None else: stream = pescador.mux( entity_pool, n_samples=None, k=working_size, lam=20) chord_streams.append(stream) chord_streams = np.array(chord_streams) binary_pool = [] for chord_idx in range(vocab_dim): if chord_streams[chord_idx] is None: continue # Skip contrast streams with (a) no data or (b) no probability. not_chord_probs = neg_probs[chord_idx] not_chord_probs[chord_idx] = 0.0 not_chord_probs *= has_data nidx = not_chord_probs > 0.0 assert not_chord_probs.sum() > 0.0 chord_pool = [pescador.Streamer(x) for x in chord_streams[nidx]] neg_stream = pescador.mux(chord_pool, n_samples=None, k=len(chord_pool), lam=None, with_replacement=False, pool_weights=not_chord_probs[nidx]) pair_stream = itertools.izip(chord_streams[chord_idx], neg_stream) binary_pool.append(pescador.Streamer(pair_stream)) cstream = pescador.mux(binary_pool, n_samples=None, k=len(binary_pool), lam=None, with_replacement=False) return FX.unpack_contrastive_pairs(cstream, vocab_dim)
def test_restart_mux(): s1 = pescador.Streamer('abc') s2 = pescador.Streamer('def') mux = pescador.Mux([s1, s2], k=2, rate=None, revive=True, with_replacement=False, random_state=1234) assert len(list(mux(max_iter=100))) == len(list(mux(max_iter=100)))
def create_uniform_chord_index_stream(stash, win_length, lexicon, index_mapper=map_chord_labels, sample_func=slice_cqt_entity, pitch_shift_func=FX.pitch_shift_cqt, max_pitch_shift=0, working_size=4, partition_labels=None, valid_idx=None): """Return a stream of chord samples, with uniform quality presentation. Parameters ---------- stash : biggie.Stash A collection of chord entities. win_length : int Length of a given tile slice. lexicon : lexicon.Lexicon Instantiated chord lexicon for mapping labels to indices. working_size : int Number of open streams at a time. pitch_shift : int Maximum number of semitones (+/-) to rotate an observation. partition_labels : dict Returns ------- stream : generator Data stream of windowed chord entities. """ if partition_labels is None: partition_labels = util.partition(stash, index_mapper, lexicon) if valid_idx is None: valid_idx = range(lexicon.num_classes) chord_pool = [] for chord_idx in valid_idx: subindex = util.index_partition_arrays(partition_labels, [chord_idx]) entity_pool = [pescador.Streamer(chord_sampler, key, stash, win_length, subindex, sample_func=sample_func) for key in subindex.keys()] if len(entity_pool) == 0: continue stream = pescador.mux( entity_pool, n_samples=None, k=working_size, lam=20) chord_pool.append(pescador.Streamer(stream)) stream = pescador.mux(chord_pool, n_samples=None, k=lexicon.vocab_dim, lam=None, with_replacement=False) if max_pitch_shift > 0: stream = pitch_shift_func(stream, max_pitch_shift=max_pitch_shift) return FX.map_to_class_index(stream, index_mapper, lexicon)
def create_fretboard_stream(stash, win_length, vocab, targets, working_size=50, sample_func=util.slice_cqt_entity): """Return an unconstrained stream of chord samples with class indexes. Parameters ---------- stash : biggie.Stash A collection of chord entities. win_length : int Length of a given tile slice. working_size : int Number of open streams at a time. Returns ------- stream : generator Data stream of windowed cqt-fret entities. """ entity_pool = [ pescador.Streamer(cqt_sampler, key, stash, win_length, sample_func=sample_func) for key in stash.keys() ] stream = pescador.mux(entity_pool, None, working_size, lam=25) return futil.fretboard_mapper(stream, vocab, targets)
def setup(self): """Perform the setup to prepare for streaming.""" # Instrument names instruments = list(self.features_df["instrument"].unique()) # Get Muxes for each instrument. inst_muxes = [self._instrument_mux(i) for i in instruments] # Construct the streams for each mux. mux_streams = [ pescador.Streamer(x) for x in inst_muxes if x is not None ] # Construct the master mux master_mux = pescador.mux(mux_streams, **self.master_mux_params) # We have to wrap the mux in a stream so that the buffer # knows what to do with it. self.master_stream = pescador.Streamer(master_mux) # Now construct the final streamer if self.use_zmq: self.buffered_streamer = zmq_buffered_stream( self.master_stream, self.batch_size) else: self.buffered_streamer = buffer_stream(self.master_stream, self.batch_size)
def chord_streamer(stash, win_length, partition_labels=None, vocab_dim=157, working_size=4, valid_idx=None, n_samples=5000, batch_size=50): """Return a stream of chord samples, with uniform quality presentation.""" if partition_labels is None: partition_labels = util.partition(stash, D.chord_map) if valid_idx is None: valid_idx = range(vocab_dim) chord_pool = [] chord_idx = [] for idx in valid_idx: print "Opening %d ..." % idx subindex = util.index_partition_arrays(partition_labels, [idx]) entity_pool = [pescador.Streamer(D.chord_sampler, key, stash, win_length, subindex) for key in subindex.keys()] if len(entity_pool) == 0: continue stream = pescador.mux( entity_pool, n_samples=n_samples, k=working_size, lam=20) batch = S.minibatch(FX.map_to_chord_index(stream, vocab_dim), batch_size=batch_size) chord_pool.append(batch) chord_idx.append(idx) print "Done!" return chord_pool, np.array(chord_idx)
def create_labeled_stream(stash, win_length, working_size=5000, threshold=None, sample_func=slice_cqt_entity): """Return an unconstrained stream of samples with class labels. Parameters ---------- stash : biggie.Stash A collection of chord entities. win_length : int Length of a given tile slice. working_size : int Number of open streams at a time. threshold : scalar, default=None Threshold under which to suppress entities. sample_func : callable Sampling function to apply to each entity. Returns ------- stream : generator Data stream of windowed entities. """ args = dict(sample_func=sample_func, threshold=threshold) entity_pool = [pescador.Streamer(cqt_sampler, key, stash, win_length, **args) for key in stash.keys()] return pescador.mux(entity_pool, None, working_size, lam=25)
def create_target_stream(stash, win_length, working_size=50, max_pitch_shift=0, bins_per_pitch=1, sample_func=slice_cqt_entity, mapper=FX.map_to_chroma): """Return an unconstrained stream of chord samples with class indexes. Parameters ---------- stash : biggie.Stash A collection of chord entities. win_length : int Length of a given tile slice. lexicon : lexicon.Lexicon Instantiated chord lexicon for mapping labels to indices. working_size : int Number of open streams at a time. max_pitch_shift : int Maximum number of semitones (+/-) to rotate an observation. partition_labels : dict Returns ------- stream : generator Data stream of windowed chord entities. """ entity_pool = [pescador.Streamer(chord_sampler, key, stash, win_length, sample_func=sample_func) for key in stash.keys()] stream = pescador.mux(entity_pool, None, working_size, lam=25) if max_pitch_shift > 0: stream = FX.pitch_shift_cqt(stream, max_pitch_shift=max_pitch_shift) return mapper(stream, bins_per_pitch)
def setup_mux(self, streams, stream_weights): stream_mux = pescador.mux(streams, n_samples=None, k=len(streams), lam=None, pool_weights=stream_weights) self.streamer = pescador.Streamer(stream_mux)
def create_fretboard_stream(stash, win_length, vocab, targets, working_size=50, sample_func=util.slice_cqt_entity): """Return an unconstrained stream of chord samples with class indexes. Parameters ---------- stash : biggie.Stash A collection of chord entities. win_length : int Length of a given tile slice. working_size : int Number of open streams at a time. Returns ------- stream : generator Data stream of windowed cqt-fret entities. """ entity_pool = [pescador.Streamer(cqt_sampler, key, stash, win_length, sample_func=sample_func) for key in stash.keys()] stream = pescador.mux(entity_pool, None, working_size, lam=25) return futil.fretboard_mapper(stream, vocab, targets)
def muxed_uniform_chord_stream(stash, synth_stash, win_length, vocab_dim=157, pitch_shift=0, working_size=4): """Return a stream of chord samples, merging two separate datasets.""" partition_labels = util.partition(stash, chord_map) synth_partition_labels = util.partition(synth_stash, chord_map) valid_idx = range(vocab_dim) valid_idx_synth = range(60, vocab_dim - 1) chord_pool = [] for chord_idx in valid_idx: subindex = util.index_partition_arrays(partition_labels, [chord_idx]) entity_pool = [ pescador.Streamer(chord_sampler, key, stash, win_length, subindex) for key in subindex.keys() ] if chord_idx in valid_idx_synth: subindex = util.index_partition_arrays(synth_partition_labels, [chord_idx]) synth_pool = [ pescador.Streamer(chord_sampler, key, synth_stash, win_length, subindex) for key in subindex.keys() ] entity_pool.extend(synth_pool) if len(entity_pool) == 0: continue stream = pescador.mux(entity_pool, n_samples=None, k=working_size, lam=20) chord_pool.append(pescador.Streamer(stream)) stream = pescador.mux(chord_pool, n_samples=None, k=vocab_dim, lam=None, with_replacement=False) if pitch_shift: stream = FX.pitch_shift_cqt(stream, max_pitch_shift=pitch_shift) return FX.map_to_chord_index(stream, vocab_dim)
def neighbor_stream(neighbors, dataset, slice_func, working_size=10, lam=25, with_meta=False, **kwargs): """Produce a sample stream of positive and negative examples. Parameters ---------- neighbors : dict of lists Map of neighborhood keys (names) to lists of related indexes. dataset : pd.DataFrame Dataset from which to sample. slice_func : callable Method for slicing observations from a npz archive. working_size : int, default=10 Number of sample sources to keep alive. lam : number, default=25 Sample refresh-rate parameter. with_meta : bool, default=False If True, yields a tuple of (X, Y) data. kwargs : dict Keyword arguments to pass through to the slicing function. Yields ------ x_in, x_same, x_diff : np.ndarrays Tensors corresponding to the base observation, a similar datapoint, and a different one. y_in, y_same, y_diff : dicts Metadata corresponding to the samples 'x' data. """ streams = dict() for key, indexes in neighbors.items(): seed_pool = [pescador.Streamer(slice_func, dataset.loc[idx], **kwargs) for idx in indexes] streams[key] = pescador.mux(seed_pool, n_samples=None, k=working_size, lam=lam) while True: keys = list(streams.keys()) idx = random.choice(keys) x_in, y_in = next(streams[idx]) x_same, y_same = next(streams[idx]) keys.remove(idx) idx = random.choice(keys) x_diff, y_diff = next(streams[idx]) result = (dict(x_in=x_in, x_same=x_same, x_diff=x_diff), dict(y_in=y_in, y_same=y_same, y_diff=y_diff)) yield result if with_meta else result[0]
def create_chord_index_stream(stash, win_length, lexicon, index_mapper=map_chord_labels, sample_func=slice_cqt_entity, pitch_shift_func=FX.pitch_shift_cqt, max_pitch_shift=0, working_size=50, partition_labels=None, valid_idx=None): """Return an unconstrained stream of chord samples with class indexes. Parameters ---------- stash : biggie.Stash A collection of chord entities. win_length : int Length of a given tile slice. lexicon : lexicon.Lexicon Instantiated chord lexicon for mapping labels to indices. working_size : int Number of open streams at a time. pitch_shift : int Maximum number of semitones (+/-) to rotate an observation. partition_labels : dict Returns ------- stream : generator Data stream of windowed chord entities. """ if partition_labels is None: partition_labels = util.partition(stash, index_mapper, lexicon) if valid_idx is None: valid_idx = range(lexicon.num_classes) chord_index = util.index_partition_arrays(partition_labels, valid_idx) entity_pool = [ pescador.Streamer(chord_sampler, key, stash, win_length, chord_index, sample_func=sample_func) for key in stash.keys() ] stream = pescador.mux(entity_pool, None, working_size, lam=25) if max_pitch_shift > 0: stream = pitch_shift_func(stream, max_pitch_shift=max_pitch_shift) return FX.map_to_class_index(stream, index_mapper, lexicon)
def compute_chord_averages(stash, win_length=20, num_obs=5000): quality_partition = util.partition(stash, quality_map) qual_indexes = [util.index_partition_arrays(quality_partition, [q]) for q in range(13)] qual_pools = [[pescador.Streamer(chord_sampler, key, stash, 20, q_idx) for key in q_idx] for q_idx in qual_indexes] obs_aves = [] for pool in qual_pools: base_stream = pescador.mux(pool, n_samples=None, k=50, lam=5) for root in range(12): stream = FX.rotate_chord_to_root(base_stream, root) x_obs = np.array([stream.next().cqt for _ in range(num_obs)]) obs_aves.append(x_obs.mean(axis=0).squeeze()) print len(obs_aves) null_index = util.index_partition_arrays(quality_partition, [13]) null_pool = [pescador.Streamer(chord_sampler, key, stash, 20, null_index) for key in null_index] stream = pescador.mux(null_pool, n_samples=None, k=50, lam=5) x_obs = np.array([stream.next().cqt for _ in range(num_obs)]) obs_aves.append(x_obs.mean(axis=0).squeeze()) return np.array(obs_aves)
def sample_chord_qualities(stash, output_dir, win_length=20, num_obs=10000): quality_partition = util.partition(stash, quality_map) qual_indexes = [util.index_partition_arrays(quality_partition, [q]) for q in range(13)] qual_pools = [[pescador.Streamer(chord_sampler, key, stash, 20, q_idx) for key in q_idx] for q_idx in qual_indexes] futil.create_directory(output_dir) print "[%s] Starting loop" % time.asctime() for qual, pool in enumerate(qual_pools): base_stream = pescador.mux(pool, n_samples=None, k=50, lam=5) for root in range(12): stream = FX.rotate_chord_to_root(base_stream, root) x_obs = np.array([stream.next().cqt for _ in range(num_obs)]) chord_idx = qual*12 + root np.save(os.path.join(output_dir, "%03d.npy" % chord_idx), x_obs) print "[%s] %3d" % (time.asctime(), chord_idx) null_index = util.index_partition_arrays(quality_partition, [13]) null_pool = [pescador.Streamer(chord_sampler, key, stash, 20, null_index) for key in null_index] stream = pescador.mux(null_pool, n_samples=None, k=50, lam=5) x_obs = np.array([stream.next().cqt for _ in range(num_obs)]) np.save(os.path.join(output_dir, "156.npy"), x_obs)
def load_dataset(stash, ): partition_labels = util.partition(stash, D.quality_map) qual_obs = [] for q in range(13): qindex = util.index_partition_arrays(partition_labels, [q]) entity_pool = [pescador.Streamer(D.chroma_stepper, k, stash, qindex) for k in qindex] stream = pescador.mux(entity_pool, n_samples=None, k=50, lam=None, with_replacement=False) obs = np.array([x for x in FX.rotate_chroma_to_root(stream, 0)]) qual_obs.append(util.normalize(obs, axis=1)) return qual_obs
def create_uniform_factored_stream(stash, win_length, partition_labels=None, working_size=50, vocab_dim=157, pitch_shift=True): """Return a stream of chord samples, with uniform quality presentation.""" if partition_labels is None: partition_labels = util.partition(stash, quality_map) quality_pool = [] for qual_idx in range(13): quality_subindex = util.index_partition_arrays( partition_labels, [qual_idx]) entity_pool = [pescador.Streamer(chord_sampler, key, stash, win_length, quality_subindex) for key in quality_subindex.keys()] stream = pescador.mux(entity_pool, n_samples=None, k=25, lam=20) quality_pool.append(pescador.Streamer(stream)) stream = pescador.mux(quality_pool, n_samples=None, k=working_size, lam=None, with_replacement=False) if pitch_shift: stream = FX.pitch_shift_cqt(stream) return FX.map_to_joint_index(stream, vocab_dim)
def class_stream(neighbors, dataset, working_size=20, lam=5, with_meta=False): streams = dict() for key, indexes in neighbors.items(): seed_pool = [pescador.Streamer(slice_embedding, dataset.loc[idx]) for idx in indexes] streams[key] = pescador.mux(seed_pool, n_samples=None, k=working_size, lam=lam) while True: keys = list(streams.keys()) idx = random.choice(keys) x_in, meta = next(streams[idx]) result = (dict(x_in=x_in, y=np.array([idx])), meta) yield result if with_meta else result[0]
def create_chord_index_stream(stash, win_length, lexicon, index_mapper=map_chord_labels, sample_func=slice_cqt_entity, pitch_shift_func=FX.pitch_shift_cqt, max_pitch_shift=0, working_size=50, partition_labels=None, valid_idx=None): """Return an unconstrained stream of chord samples with class indexes. Parameters ---------- stash : biggie.Stash A collection of chord entities. win_length : int Length of a given tile slice. lexicon : lexicon.Lexicon Instantiated chord lexicon for mapping labels to indices. working_size : int Number of open streams at a time. pitch_shift : int Maximum number of semitones (+/-) to rotate an observation. partition_labels : dict Returns ------- stream : generator Data stream of windowed chord entities. """ if partition_labels is None: partition_labels = util.partition(stash, index_mapper, lexicon) if valid_idx is None: valid_idx = range(lexicon.num_classes) chord_index = util.index_partition_arrays(partition_labels, valid_idx) entity_pool = [pescador.Streamer(chord_sampler, key, stash, win_length, chord_index, sample_func=sample_func) for key in stash.keys()] stream = pescador.mux(entity_pool, None, working_size, lam=25) if max_pitch_shift > 0: stream = pitch_shift_func(stream, max_pitch_shift=max_pitch_shift) return FX.map_to_class_index(stream, index_mapper, lexicon)
def _instrument_mux(self, instrument): """Return a pescador.mux for a single instrument. Parameters ---------- instrument : str Instrument to select. Returns ------- mux : pescador.mux A pescador.mux for a single instrument. """ streams = self._instrument_streams(instrument) if len(streams): return pescador.mux(streams, n_samples=None, **self.instrument_mux_params) else: return None
def load_dataset(stash, ): partition_labels = util.partition(stash, D.quality_map) qual_obs = [] for q in range(13): qindex = util.index_partition_arrays(partition_labels, [q]) entity_pool = [ pescador.Streamer(D.chroma_stepper, k, stash, qindex) for k in qindex ] stream = pescador.mux(entity_pool, n_samples=None, k=50, lam=None, with_replacement=False) obs = np.array([x for x in FX.rotate_chroma_to_root(stream, 0)]) qual_obs.append(util.normalize(obs, axis=1)) return qual_obs
def create_target_stream(stash, win_length, working_size=50, max_pitch_shift=0, bins_per_pitch=1, sample_func=slice_cqt_entity, mapper=FX.map_to_chroma): """Return an unconstrained stream of chord samples with class indexes. Parameters ---------- stash : biggie.Stash A collection of chord entities. win_length : int Length of a given tile slice. lexicon : lexicon.Lexicon Instantiated chord lexicon for mapping labels to indices. working_size : int Number of open streams at a time. max_pitch_shift : int Maximum number of semitones (+/-) to rotate an observation. partition_labels : dict Returns ------- stream : generator Data stream of windowed chord entities. """ entity_pool = [ pescador.Streamer(chord_sampler, key, stash, win_length, sample_func=sample_func) for key in stash.keys() ] stream = pescador.mux(entity_pool, None, working_size, lam=25) if max_pitch_shift > 0: stream = FX.pitch_shift_cqt(stream, max_pitch_shift=max_pitch_shift) return mapper(stream, bins_per_pitch)
def setup(self): """Perform the setup to prepare for streaming.""" # Instrument names instruments = list(self.features_df["instrument"].unique()) # Get Muxes for each instrument. inst_muxes = [self._instrument_mux(i) for i in instruments] # Construct the streams for each mux. mux_streams = [pescador.Streamer(x) for x in inst_muxes if x is not None] # Construct the master mux master_mux = pescador.mux(mux_streams, **self.master_mux_params) # We have to wrap the mux in a stream so that the buffer # knows what to do with it. self.master_stream = pescador.Streamer(master_mux) # Now construct the final streamer if self.use_zmq: self.buffered_streamer = zmq_buffered_stream(self.master_stream, self.batch_size) else: self.buffered_streamer = buffer_stream(self.master_stream, self.batch_size)
def chord_streamer(stash, win_length, partition_labels=None, vocab_dim=157, working_size=4, valid_idx=None, n_samples=5000, batch_size=50): """Return a stream of chord samples, with uniform quality presentation.""" if partition_labels is None: partition_labels = util.partition(stash, D.chord_map) if valid_idx is None: valid_idx = range(vocab_dim) chord_pool = [] chord_idx = [] for idx in valid_idx: print "Opening %d ..." % idx subindex = util.index_partition_arrays(partition_labels, [idx]) entity_pool = [ pescador.Streamer(D.chord_sampler, key, stash, win_length, subindex) for key in subindex.keys() ] if len(entity_pool) == 0: continue stream = pescador.mux(entity_pool, n_samples=n_samples, k=working_size, lam=20) batch = S.minibatch(FX.map_to_chord_index(stream, vocab_dim), batch_size=batch_size) chord_pool.append(batch) chord_idx.append(idx) print "Done!" return chord_pool, np.array(chord_idx)
def create_embedding_stream(dataset, working_size=100, lam=5, **kwargs): seed_pool = [pescador.Streamer(slice_embedding, row, **kwargs) for idx, row in dataset.iterrows()] return pescador.mux(seed_pool, n_samples=None, k=working_size, lam=lam)
def create_contrastive_chord_stream(stash, win_length, valid_idx=None, partition_labels=None, working_size=2, vocab_dim=157, pitch_shift=0, neg_probs=None): """Return a stream of chord samples, with equal positive and negative examples.""" if partition_labels is None: partition_labels = util.partition(stash, chord_map) if valid_idx is None: valid_idx = range(vocab_dim) if neg_probs is None: neg_probs = np.ones([vocab_dim] * 2) neg_probs[np.eye(vocab_dim, dtype=bool)] = 0.0 neg_probs = util.normalize(neg_probs, axis=1) chord_streams = [] has_data = np.ones(vocab_dim, dtype=bool) for chord_idx in valid_idx: subindex = util.index_partition_arrays(partition_labels, [chord_idx]) entity_pool = [ pescador.Streamer(chord_sampler, key, stash, win_length, subindex) for key in subindex.keys() ] if len(entity_pool) == 0: has_data[chord_idx] = False stream = None else: stream = pescador.mux(entity_pool, n_samples=None, k=working_size, lam=20) chord_streams.append(stream) chord_streams = np.array(chord_streams) binary_pool = [] for chord_idx in range(vocab_dim): if chord_streams[chord_idx] is None: continue # Skip contrast streams with (a) no data or (b) no probability. not_chord_probs = neg_probs[chord_idx] not_chord_probs[chord_idx] = 0.0 not_chord_probs *= has_data nidx = not_chord_probs > 0.0 assert not_chord_probs.sum() > 0.0 chord_pool = [pescador.Streamer(x) for x in chord_streams[nidx]] neg_stream = pescador.mux(chord_pool, n_samples=None, k=len(chord_pool), lam=None, with_replacement=False, pool_weights=not_chord_probs[nidx]) pair_stream = itertools.izip(chord_streams[chord_idx], neg_stream) binary_pool.append(pescador.Streamer(pair_stream)) cstream = pescador.mux(binary_pool, n_samples=None, k=len(binary_pool), lam=None, with_replacement=False) return FX.unpack_contrastive_pairs(cstream, vocab_dim)
def test_restart_mux(self, mux_class): s1 = pescador.Streamer('abc') s2 = pescador.Streamer('def') mux = mux_class([s1, s2], 2, rate=None, random_state=1234) assert len(list(mux(max_iter=100))) == len(list(mux(max_iter=100)))
def test_mux_inf_loop(self, mux_class): s1 = pescador.Streamer([]) s2 = pescador.Streamer([]) mux = mux_class([s1, s2], 2, rate=None, random_state=1234) assert len(list(mux(max_iter=100))) == 0
def create_pairwise_stream(stash, win_length, working_size=100, threshold=None, sample_func=slice_cqt_entity): """Return a stream of samples, with equal positive and negative examples. Parameters ---------- stash : biggie.Stash A collection of chord entities. win_length : int Length of a given tile slice. working_size : int Number of open streams at a time. threshold : scalar, default=None Threshold under which to suppress entities. sample_func : callable Sampling function to apply to each entity. Returns ------- stream : generator Data stream of windowed entities. """ args = dict(sample_func=sample_func) if threshold is not None: args.update(threshold=threshold) keys = stash.keys() partitions = dict() # Group keys by instrument code for k in keys: icode = k.split("_")[0] if icode not in partitions: partitions[icode] = list() partitions[icode].append(k) inst_streams = [] for icode, key_set in partitions.items(): entity_pool = [pescador.Streamer(cqt_sampler, key, stash, win_length, **args) for key in key_set] stream = pescador.mux(entity_pool, n_samples=None, k=working_size, lam=1) inst_streams.append(stream) inst_streams = np.array(inst_streams) triple_pool = [] nrange = np.arange(len(inst_streams)) for idx, stream in enumerate(inst_streams): neg_pool = [pescador.Streamer(x) for x in inst_streams[np.not_equal(nrange, idx)]] neg_stream = pescador.mux(neg_pool, n_samples=None, k=len(neg_pool), lam=None, with_replacement=False) triples = itertools.izip( inst_streams[idx], inst_streams[idx], neg_stream) triple_pool.append(pescador.Streamer(triples)) cstream = pescador.mux(triple_pool, n_samples=None, k=len(triple_pool), lam=None, with_replacement=False) return _unpack_triples(cstream)
def setup_mux(self): stream_mux = pescador.mux(self.seed_pool, k=len(self.seed_pool), **self.CLASS_MUX_PARAMS) self.streamer = pescador.Streamer(stream_mux)
def create_embedding_stream(dataset, working_size=100, lam=5, **kwargs): seed_pool = [ pescador.Streamer(slice_embedding, row, **kwargs) for idx, row in dataset.iterrows() ] return pescador.mux(seed_pool, n_samples=None, k=working_size, lam=lam)
def create_uniform_chroma_stream(stash, win_length, lexicon, working_size=5, bins_per_pitch=1, max_pitch_shift=0, partition_labels=None, valid_idx=None): """Return an unconstrained stream of chord samples with class indexes. Parameters ---------- stash : biggie.Stash A collection of chord entities. win_length : int Length of a given tile slice. lexicon : lexicon.Lexicon Instantiated chord lexicon for mapping labels to indices. working_size : int Number of open streams at a time. pitch_shift : int Maximum number of semitones (+/-) to rotate an observation. partition_labels : dict Returns ------- stream : generator Data stream of windowed chord entities. """ if partition_labels is None: partition_labels = util.partition(stash, map_chord_labels, lexicon) if valid_idx is None: valid_idx = range(lexicon.num_classes) chord_pool = [] for chord_idx in valid_idx: subindex = util.index_partition_arrays(partition_labels, [chord_idx]) entity_pool = [ pescador.Streamer(chord_sampler, key, stash, win_length, subindex, sample_func=slice_cqt_entity) for key in subindex.keys() ] if len(entity_pool) == 0: continue stream = pescador.mux(entity_pool, n_samples=None, k=working_size, lam=20) chord_pool.append(pescador.Streamer(stream)) stream = pescador.mux(chord_pool, n_samples=None, k=lexicon.vocab_dim, lam=None, with_replacement=False) if max_pitch_shift > 0: stream = FX.pitch_shift_cqt(stream, max_pitch_shift=max_pitch_shift) return FX.map_to_chroma(stream, bins_per_pitch)