def unpack_contrastive_pairs(stream, vocab_dim, min_val=0.0, max_val=1.0, rotate_prob=0.0): """ vocab_dim: int """ for pair in stream: if pair is None: yield pair continue pos_entity, neg_entity = pair pos_chord_label = str(pos_entity.chord_label) neg_chord_label = str(neg_entity.chord_label) pos_chord_idx = labels.chord_label_to_class_index( pos_chord_label, vocab_dim) neg_chord_idx = labels.chord_label_to_class_index( neg_chord_label, vocab_dim) if np.random.binomial(1, rotate_prob): shift = (pos_chord_idx - neg_chord_idx) % 12 neg_entity = _padshift(neg_entity, shift, 3) yield biggie.Entity(cqt=pos_entity.cqt, chord_idx=pos_chord_idx, target=np.array([max_val])) yield biggie.Entity(cqt=neg_entity.cqt, chord_idx=pos_chord_idx, target=np.array([min_val]))
def chroma_trigrams(ref_set): states = dict() for v in ref_set.values(): labels = v['labels'] y = L.chord_label_to_class_index(labels, 157) intervals = np.array(v['intervals']) durations = np.abs(np.diff(intervals, axis=1)).flatten() for n in range(1, len(y) - 1): sidx = [ L.relative_chord_index(y[n], y[n + i], 157) for i in range(-1, 2) ] if None in sidx: continue rot_labels = [L.index_to_chord_label(s, 157) for s in sidx] c = tuple([ "".join( ["%d" % _ for _ in L.chord_label_to_chroma(l).flatten()]) for l in rot_labels ]) if not c in states: states[c] = dict(labels=set(), duration=0.0) states[c]['duration'] += durations[n] states[c]['labels'].add(labels[n]) return states
def collision_histogram(stash, bins=100, val_range=(0, 1)): likelihoods = dict() for idx, key in enumerate(stash.keys()): print "[%s] %12d / %12d: %s" % (time.asctime(), idx, len(stash), key) entity = stash.get(key) posterior = np.asarray(entity.posterior) vocab_dim = posterior.shape[1] class_idx = L.chord_label_to_class_index( entity.chord_labels, vocab_dim) quality_idx = D.quality_map(entity, vocab_dim) for pdf, cidx, qidx in zip(posterior, class_idx, quality_idx): if None in [qidx, cidx]: continue if not qidx in likelihoods: likelihoods[qidx] = list() pdf[cidx] = -1 likelihoods[qidx].append(pdf.max()) for idx in likelihoods: likelihoods[idx] = np.histogram( likelihoods[idx], bins=bins, range=val_range) bins = np.array([likelihoods[n][0] for n in range(14)]) return bins, likelihoods[0][1]
def rotate_chord_to_root(stream, target_root): """Apply a circular shift to the CQT, and rotate the root.""" for entity in stream: if entity is None: yield entity continue chord_label = str(entity.chord_label) chord_idx = labels.chord_label_to_class_index(chord_label, 157) shift = target_root - chord_idx % 12 # print chord_idx, shift, chord_label yield _padshift(entity, shift, 3)
def rotate_chroma_to_root(stream, target_root): """Apply a circular shift to the CQT, and rotate the root.""" for entity in stream: if entity is None: yield entity continue chroma = entity.chroma.reshape(1, 12) chord_label = str(entity.chord_label) chord_idx = labels.chord_label_to_class_index(chord_label, 157) shift = target_root - chord_idx % 12 # print chord_idx, shift, chord_label yield util.circshift(chroma, 0, shift).flatten()
def likelihood_threshold(entity, thresholds, label_map=D.quality_map): quality_idx = label_map(entity) posterior = np.asarray(entity.posterior) vocab_dim = posterior.shape[1] class_idx = L.chord_label_to_class_index( entity.chord_labels, vocab_dim) for idx in range(len(quality_idx)): cidx, qidx = class_idx[idx], quality_idx[idx] if qidx is None: continue if thresholds[qidx] > posterior[idx, cidx]: quality_idx[idx] = None return quality_idx
def predict_all(stash, pca, ann): predictions = dict() for n, key in enumerate(stash.keys()): x = stash.get(key) y_true = np.array(L.chord_label_to_class_index(x.chord_labels, 157)) y_pred = predict(x.cqt, pca, ann) predictions[key] = y_pred.tolist() valid_idx = np.not_equal(y_true, None) if valid_idx.sum() > 0: score = np.equal(y_true[valid_idx], y_pred[valid_idx]).mean() else: score = 0 print "[%s] %4d: %s (%0.4f)" % (time.asctime(), n, key, score) return predictions
def predict_all(stash, pca, ann): predictions = dict() for n, key in enumerate(stash.keys()): x = stash.get(key) y_true = np.array( L.chord_label_to_class_index(x.chord_labels, 157)) y_pred = predict(x.cqt, pca, ann) predictions[key] = y_pred.tolist() valid_idx = np.not_equal(y_true, None) if valid_idx.sum() > 0: score = np.equal(y_true[valid_idx], y_pred[valid_idx]).mean() else: score = 0 print "[%s] %4d: %s (%0.4f)" % (time.asctime(), n, key, score) return predictions
def count_bigrams(reference_set, vocab_dim=157): states = dict() for labeled_intervals in reference_set.values(): chord_idx = L.chord_label_to_class_index(labeled_intervals['labels'], vocab_dim) intervals = np.array(labeled_intervals['intervals']) durations = np.abs(np.diff(intervals, axis=1)).flatten() for n in range(1, len(chord_idx)): s = tuple([L.relative_chord_index(chord_idx[n], chord_idx[n + i], 157) for i in range(-1, 1)]) if not s in states: states[s] = 0 states[s] += durations[n] labels = states.keys() counts = [states[y] for y in labels] idx = np.argsort(counts)[::-1] return [labels[i] for i in idx], [counts[i] for i in idx]
def count_bigrams(reference_set, vocab_dim=157): states = dict() for labeled_intervals in reference_set.values(): chord_idx = L.chord_label_to_class_index(labeled_intervals['labels'], vocab_dim) intervals = np.array(labeled_intervals['intervals']) durations = np.abs(np.diff(intervals, axis=1)).flatten() for n in range(1, len(chord_idx)): s = tuple([ L.relative_chord_index(chord_idx[n], chord_idx[n + i], 157) for i in range(-1, 1) ]) if not s in states: states[s] = 0 states[s] += durations[n] labels = states.keys() counts = [states[y] for y in labels] idx = np.argsort(counts)[::-1] return [labels[i] for i in idx], [counts[i] for i in idx]
def align_estimation_to_reference(est_file, ref_file, num_classes=157): """ Parameters ---------- posterior: np.ndarray Posteriorgram of chord classes. viterbi_penalty: scalar, in (0, inf) Self-transition penalty; higher values produce more "stable" paths. Returns ------- predictions: dict Chord labels and dense count vectors. """ predictions = dict() est_intervals, est_labels = mir_eval.io.load_labeled_intervals(est_file) ref_intervals, ref_labels = mir_eval.io.load_labeled_intervals(ref_file) t_min = ref_intervals.min() t_max = ref_intervals.max() ref_intervals, ref_labels = mir_eval.util.adjust_intervals( ref_intervals, ref_labels, t_min, t_max, L.NO_CHORD, L.NO_CHORD) est_intervals, est_labels = mir_eval.util.adjust_intervals( est_intervals, est_labels, t_min, t_max, L.NO_CHORD, L.NO_CHORD) # Merge the time-intervals intervals, ref_labels, est_labels = mir_eval.util.merge_labeled_intervals( ref_intervals, ref_labels, est_intervals, est_labels) indexes = [L.chord_label_to_class_index(l, num_classes) for l in est_labels] for interval, label, idx in zip(intervals, ref_labels, indexes): if idx is None: raise ValueError( "Received an erroneous estimated label: \n" "\tfile: %s\ttime: %s" % (est_file, interval.tolist())) if not label in predictions: predictions[label] = np.zeros(num_classes, dtype=np.int).tolist() predictions[label][idx] += float(np.abs(np.diff(interval))) return predictions
def chroma_trigrams(ref_set): states = dict() for v in ref_set.values(): labels = v['labels'] y = L.chord_label_to_class_index(labels, 157) intervals = np.array(v['intervals']) durations = np.abs(np.diff(intervals, axis=1)).flatten() for n in range(1, len(y) - 1): sidx = [L.relative_chord_index(y[n], y[n + i], 157) for i in range(-1, 2)] if None in sidx: continue rot_labels = [L.index_to_chord_label(s, 157) for s in sidx] c = tuple(["".join(["%d" % _ for _ in L.chord_label_to_chroma(l).flatten()]) for l in rot_labels]) if not c in states: states[c] = dict(labels=set(), duration=0.0) states[c]['duration'] += durations[n] states[c]['labels'].add(labels[n]) return states
def map_to_joint_index(stream, vocab_dim): """ vocab_dim: int """ for entity in stream: if entity is None: yield entity continue values = entity.values() cqt, chord_label = values.pop('cqt'), str(values.pop('chord_label')) chord_idx = labels.chord_label_to_class_index(chord_label, vocab_dim) if chord_idx is None: yield None continue if chord_idx == vocab_dim - 1: root_idx = 13 else: root_idx = chord_idx % 12 quality_idx = int(chord_idx) / 12 yield biggie.Entity(cqt=cqt, root_idx=root_idx, quality_idx=quality_idx)
def draw_posterior_lazy(entity, vocab_dim=157, **viterbi_args): chord_idx = L.chord_label_to_class_index(entity.chord_labels, vocab_dim) pred_idx = util.viterbi(entity.posterior, **viterbi_args) draw_posterior(entity.posterior, chord_idx, pred_idx)
def draw_posterior_lazy(entity, vocab_dim=157, **viterbi_args): chord_idx = L.chord_label_to_class_index(entity.chord_labels, vocab_dim) pred_idx = util.viterbi(entity.posterior, **viterbi_args) draw_posterior(entity.posterior, chord_idx, pred_idx)