def main(): """ This example shows how to parse a music mxl file (music21 and musescore/finale required) and create a simple oracle representation. The output is a reshuffled midi stream shown in either musescore or finale based on your installation of music21. OPTIONAL ARGS: seq_len: an integer for the length of the output sequence. p: a float of the probability using the forward links. k: an integer for the starting state. LRS: an integer for the lower limit of the LRS of sfx/rsfx allowed to jump to. weight: None: choose uniformly among all the possible sfx/rsfx given current state. "max": always choose the sfx/rsfx having the longest LRS. "weight": choose sfx/rsfx in a way that favors longer ones than shorter ones. """ filename = os.path.abspath( '') + '/../files/Suite_No_1_for_Cello_M1_Prelude.mxl' s = music21.converter.parse(filename) c = s.getElementById('Violoncello') m = c.flat.notes note_obj_seq = [x for x in m if type(x) is music21.note.Note] bo = vmo.build_oracle(note_obj_seq, 'f') bo.name = 'bach_cello_example' if len(sys.argv) == 1: b, kend, ktrace = gen.generate(bo, len(note_obj_seq), 0.0, 0, LRS=2, weight='weight') else: seq_len = int(sys.argv[1]) if seq_len == 0: seq_len = len(note_obj_seq) p = float(sys.argv[2]) k = int(sys.argv[3]) LRS = int(sys.argv[4]) weight = sys.argv[5] b, kend, ktrace = gen.generate(bo, seq_len, p, k, LRS=LRS, weight=weight) stream1 = music21.stream.Stream() x = [bo.data[i] for i in b] for i in range(len(x)): _n = music21.note.Note(x[i][0].nameWithOctave) _n.duration.type = x[i][0].duration.type _n.duration = x[i][0].duration stream1.append(_n) s.show() stream1.show()
def processFlat(self): """Main process. Returns ------- est_idxs : np.array(N) Estimated indeces the segment boundaries in frame indeces. est_labels : np.array(N-1) Estimated labels for the segments. """ # Preprocess to obtain features (array(n_frames, n_features)) F = self._preprocess() F = librosa.util.normalize(F, axis=0) F = librosa.feature.stack_memory(F.T).T # F = pre.normalize(F, axis=0) ideal_t = vmo.find_threshold(F, dim=F.shape[1]) oracle = vmo.build_oracle(F, flag='a', threshold=ideal_t[0][1], dim=F.shape[1]) my_bounds, my_labels = segmentation( oracle, method=self.config['method'], connectivity=self.config['connectivity']) # Post process estimations est_idxs, est_labels = self._postprocess(my_bounds, my_labels[:-1]) assert est_idxs[0] == 0 and est_idxs[-1] == F.shape[0] - 1 # We're done! return est_idxs, est_labels
def vmo_routine(feature): ideal_t = vmo.find_threshold(feature, dim=feature.shape[1]) oracle = vmo.build_oracle(feature, flag='a', threshold=ideal_t[0][1], dim=feature.shape[1]) return oracle
def main(): """ This example shows how to parse a music mxl file (music21 and musescore/finale required) and create a simple oracle representation. The output is a reshuffled midi stream shown in either musescore or finale based on your installation of music21. OPTIONAL ARGS: seq_len: an integer for the length of the output sequence. p: a float of the probability using the forward links. k: an integer for the starting state. LRS: an integer for the lower limit of the LRS of sfx/rsfx allowed to jump to. weight: None: choose uniformly among all the possible sfx/rsfx given current state. "max": always choose the sfx/rsfx having the longest LRS. "weight": choose sfx/rsfx in a way that favors longer ones than shorter ones. """ filename = os.path.abspath('') + '/../files/Suite_No_1_for_Cello_M1_Prelude.mxl' s = music21.converter.parse(filename) c = s.getElementById('Violoncello') m = c.flat.notes note_obj_seq = [x for x in m if type(x) is music21.note.Note] bo = vmo.build_oracle(note_obj_seq,'f') bo.name = 'bach_cello_example' if len(sys.argv) == 1: b, kend, ktrace = gen.generate(bo, len(note_obj_seq), 0.0, 0, LRS=2, weight='weight') else: seq_len = int(sys.argv[1]) if seq_len == 0: seq_len = len(note_obj_seq) p = float(sys.argv[2]) k = int(sys.argv[3]) LRS = int(sys.argv[4]) weight = sys.argv[5] b, kend, ktrace = gen.generate(bo, seq_len, p, k, LRS=LRS, weight=weight) stream1 = music21.stream.Stream() x = [bo.data[i] for i in b] for i in range(len(x)): _n = music21.note.Note(x[i][0].nameWithOctave) _n.duration.type = x[i][0].duration.type _n.duration = x[i][0].duration stream1.append(_n) s.show() stream1.show()
def _self_sim_vmo(feature, d, m='lrs'): if d == 'cosine' or 'correlation': r = (0., 1., 0.02) elif d == 'euclidean': r = (0., 30., .5) elif d == 'sqeuclidean': r = (0., 800., 16.) ideal_t = vmo.find_threshold(feature, r=r, flag='a', dfunc=d, dim=feature.shape[1]) oracle = vmo.build_oracle(feature, flag='a', threshold=ideal_t[0][1], dfunc=d, dim=feature.shape[1]) return vse.create_selfsim(oracle, method=m)
def generate(self, audio_path, sr=44100, hop_length=512): """Segmentation of an audio recording using the Constant Q Transform (CQT) Args: audio_path (str): A string representing the path of the audio file sr (int): Sampling rate (default: 44100) hop_length (int): Number of samples between successive CQT columns (default: 512) """ # Extract feature y, sr = librosa.load(audio_path, sr=sr) cqt = librosa.core.cqt(y, sr=sr, hop_length=hop_length) beat = range(1, len(y), 30) # create synthetic beat of 33 frames cqt_sync = librosa.feature.sync(cqt, beat, aggregate=np.median) cqt_stack = librosa.feature.stack_memory(cqt_sync, n_steps=3) # Find ideal threshold r = (0, 1.1, 0.1) threshold = vmo.find_threshold(cqt_stack.T, r=r, dim=cqt_stack.shape[0]) ideal_t = threshold[0][1] # Build Oracle cqt_vmo = vmo.build_oracle(cqt_stack.T, flag='a', threshold=ideal_t, dim=cqt_stack.shape[0]) # Symbol spectral method = 'symbol_spectral' est_boundaries, est_labels = van.segmentation(cqt_vmo, method=method, connectivity='lrs') est_intervals = zip(est_boundaries[:-1], est_boundaries[1:]) ulabel, invind = np.unique(est_labels, return_inverse=True) # Setting attributes self._oracle = cqt_vmo self._segmentation = [ Section(l, Region(i1, i2)) for l, (i1, i2) in zip(invind, est_intervals) ]
for i, p in enumerate(audio_test['pattern']): for _p in p: start = _p[0] - audio_test['info'][2][0] end = _p[1] - audio_test['info'][2][0] len_list.append(end - start) ground[i][start:end + 1] = i + 1 min_len = int( min(len_list) * len(subbeat_mat[ind]) / (audio_test['info'][2][1] - audio_test['info'][2][0])) + 1 min_len_list.append(min_len) print min_len_list start_time = time.time() for ind in range(5): chroma_frames = feature_mat[ind].transpose() r = (0.0, 1.0, 0.01) ideal_v_inv = vmo.find_threshold(chroma_frames, r=r, flag='a', dfunc='other', dfunc_handle=trnspose_inv, VERBOSE=False) oracle_inv = vmo.build_oracle(chroma_frames, flag='a', threshold=ideal_v_inv[0][1], feature='chroma', dfunc='other', dfunc_handle=trnspose_inv) pattern = van.find_repeated_patterns(oracle_inv, lower=5) print str(time.time() - start_time)
subbeats.extend(np.linspace(beats[-1], C.shape[1], num=2, endpoint=False).astype("int").tolist()) C_sync = librosa.feature.sync(C, subbeats, aggregate=np.median) subbeats.append(C.shape[1]) feature = np.log(C_sync + np.finfo(float).eps) feature = pre.normalize(feature, axis=0) ### Create VMO chroma_frames = feature.transpose() ideal_v_inv = vmo.find_threshold( chroma_frames, r=r, flag="a", dfunc="other", dfunc_handle=trnspose_inv, dim=chroma_frames.shape[1] ) oracle_inv = vmo.build_oracle( chroma_frames, flag="a", threshold=ideal_v_inv[0][1], feature="chroma", dfunc="other", dfunc_handle=trnspose_inv, dim=chroma_frames.shape[1], ) ### Gather Ground Truth from Dataset ground = np.zeros((len(audio_test["pattern"]), audio_test["info"][2][1] - audio_test["info"][2][0])) len_list = [] for i, p in enumerate(audio_test["pattern"]): for _p in p: start = _p[0] - audio_test["info"][2][0] end = _p[1] - audio_test["info"][2][0] len_list.append(end - start) ground[i][start : end + 1] = i + 1
plt.figure() plt.plot(x_a, y_a, linewidth=2) plt.title('IR vs. Threshold Value', fontsize=18) plt.grid(b='on') plt.xlabel('Threshold', fontsize=14) plt.ylabel('IR', fontsize=14) # <markdowncell> # Build the best oracle by choosing the ideal threshold (one that gives most informative oracle). # <codecell> best_oracle = vmo.build_oracle(features, flag='a', threshold=ideal_t[0][1], feature='chroma', dfunc='euclidean', dim=features.shape[1]) #best_oracle = vmo.build_oracle(stock_data, flag='a', threshold=ideal_t[0][1], dim=stock_data.shape[1]) # <markdowncell> # # Self-similarity matrix using reverse suffix links # <codecell> rs = van.create_selfsim(best_oracle, method='rsfx') print rs fig = plt.figure() #ax = fig.add_subplot(111)
min_len_list = [] for ind in range(5): audio_test = audio_list[ind] ground = np.zeros((len(audio_test['pattern']), audio_test['info'][2][1]-audio_test['info'][2][0])) len_list = [] for i,p in enumerate(audio_test['pattern']): for _p in p: start = _p[0] - audio_test['info'][2][0] end = _p[1] - audio_test['info'][2][0] len_list.append(end-start) ground[i][start:end+1] = i+1 min_len = int(min(len_list)*len(subbeat_mat[ind])/(audio_test['info'][2][1]-audio_test['info'][2][0]))+1 min_len_list.append(min_len) print min_len_list start_time = time.time() for ind in range(5): chroma_frames = feature_mat[ind].transpose() r = (0.0, 1.0, 0.01) ideal_v_inv = vmo.find_threshold(chroma_frames, r = r,flag = 'a', dfunc = 'other', dfunc_handle = trnspose_inv, VERBOSE = False) oracle_inv= vmo.build_oracle(chroma_frames, flag = 'a', threshold = ideal_v_inv[0][1], feature = 'chroma', dfunc = 'other', dfunc_handle = trnspose_inv) pattern = van.find_repeated_patterns(oracle_inv, lower = 5) print str(time.time()-start_time)