def costalignment(a): plt.clf() #plt.figure(figsize=(8,6)) plt.gray() fixcosts(a) states = np.arange(0, a.state_count() + 1) times = np.arange(0, a.frame_count()) / audio.sample_rate * audio.nsamples #plt.axes([0,0,np.max(times),np.max(states)]) x, y = np.meshgrid(times, states) plt.pcolormesh(x, y, a.costs.transpose()) test_plot, = plt.plot(times, [s + 1 for s in a.path]) wav = data.get_audio(a.name) score = data.get_score(a.name, wav.total_length) truth = data.get_truth(a.name, score, wav) if truth: t = truth_path = truth.get_path() truth_plot, = plt.plot(times, [s + 1 for s in truth_path]) bx = plt.legend([test_plot, truth_plot], ("Automatic alignment", "Ground truth"),\ numpoints=1, handletextpad=0.5, loc="upper left") bx.draw_frame(False) plt.ylabel('State number in score sequence') plt.xlabel('Time in audio recording (s)') processplt(a, plt, "costmap")
def costalignment(a): plt.clf() # plt.figure(figsize=(8,6)) plt.gray() fixcosts(a) states = np.arange(0, a.state_count() + 1) times = np.arange(0, a.frame_count()) / audio.sample_rate * audio.nsamples # plt.axes([0,0,np.max(times),np.max(states)]) x, y = np.meshgrid(times, states) plt.pcolormesh(x, y, a.costs.transpose()) test_plot, = plt.plot(times, [s + 1 for s in a.path]) wav = data.get_audio(a.name) score = data.get_score(a.name, wav.total_length) truth = data.get_truth(a.name, score, wav) if truth: t = truth_path = truth.get_path() truth_plot, = plt.plot(times, [s + 1 for s in truth_path]) bx = plt.legend( [test_plot, truth_plot], ("Automatic alignment", "Ground truth"), numpoints=1, handletextpad=0.5, loc="upper left", ) bx.draw_frame(False) plt.ylabel("State number in score sequence") plt.xlabel("Time in audio recording (s)") processplt(a, plt, "costmap")
def testLossNotNaN(self): data = get_audio(None, 'damped_sine', hparams) model = RhoCMPS(hparams, data_iterator=data) with self.cached_session() as sess: sess.run(tf.global_variables_initializer()) self.assertFalse(np.isnan(model.loss.eval()))
def scoredata(request, name): audio = data.get_audio(name, 0.0) score = data.get_score(name, audio.total_length) output = { 'events': score.events(), 'duration': score.length(), 'note_range': score.note_range() } return HttpResponse(json.dumps(output))
def testRhoEvolvedWithDataRemainsNormalized(self): data = get_audio(None, 'damped_sine', hparams) model = RhoCMPS(hparams, data_iterator=data) rho_out = model.rho_evolve_with_data() with self.cached_session() as sess: sess.run(tf.global_variables_initializer()) self.assertAllClose(tf.trace(rho_out), tf.ones_like(rho_out[:, :, 0, 0]), rtol=1e-5)
def testPsiEvolvedWithDataRemainsNormalized(self): data = get_audio(None, 'damped_sine', hparams) model = PsiCMPS(hparams, data_iterator=data) psi_out = model.psi_evolve_with_data() with self.cached_session() as sess: sess.run(tf.global_variables_initializer()) self.assertAllClose(tf.norm(psi_out, axis=-1), tf.ones_like(psi_out[:, :, 0]), rtol=1e-5)
def align(name, lik_method, path_method, const_width=None, rel_width=None): a = data.get_alignment(name, lik_method, path_method) if a == None: audio = data.get_audio(name) score = data.get_score(name) proc = AlignmentProcessor(lik_method, path_method) a = proc.align(audio, score, const_width, rel_width) truth = data.get_truth(name, score, audio) if truth: a.truth_path = truth.get_path() a.set_name(name) data.save_alignment(a) return a
def testCorrectShape(self): data = get_audio(None, 'damped_sine', hparams) with self.cached_session() as sess: self.assertEqual(data.eval().shape, (hparams.minibatch_size, FLAGS.sample_duration))
def main(argv): # hparams = HParams(minibatch_size=8, bond_dim=8, delta_t=1/FLAGS.sample_rate, sigma=0.000001, # h_reg=200/(np.pi * FLAGS.sample_rate)**2, r_reg=2000/(np.pi * FLAGS.sample_rate), # initial_rank=None, A=100., learning_rate=0.001) hparams = HParams(minibatch_size=8, bond_dim=8, delta_t=1 / FLAGS.sample_rate, sigma=0.0001, h_reg=200 / (np.pi * FLAGS.sample_rate)**2, r_reg=0.1, initial_rank=None, A=100., learning_rate=0.001) hparams.parse(FLAGS.hparams) with tf.variable_scope("data"): data = get_audio(datadir=FLAGS.datadir, dataset=FLAGS.dataset, hps=hparams) with tf.variable_scope("model", reuse=tf.AUTO_REUSE): if FLAGS.mps_model == 'rho_mps': model = RhoCMPS(hparams=hparams, data_iterator=data) else: model = PsiCMPS(hparams=hparams, data_iterator=data) h_l2sqnorm = tf.reduce_sum(tf.square(model.freqs)) r_l2sqnorm = tf.real(tf.reduce_sum(tf.conj(model.R) * model.R)) with tf.variable_scope("total_loss"): total_loss = model.loss + hparams.h_reg * h_l2sqnorm \ + hparams.r_reg * r_l2sqnorm with tf.variable_scope("summaries"): tf.summary.scalar("A", tf.cast(model.A, dtype=tf.float32)) tf.summary.scalar("sigma", tf.cast(model.sigma, dtype=tf.float32)) tf.summary.scalar("h_l2norm", tf.sqrt(h_l2sqnorm)) tf.summary.scalar("r_l2norm", tf.sqrt(r_l2sqnorm)) gr_rate = 2 * np.pi * hparams.sigma**2 * r_l2sqnorm / hparams.bond_dim tf.summary.scalar("gr_decay_time", 1 / gr_rate) tf.summary.scalar("model_loss", tf.reshape(model.loss, [])) tf.summary.scalar("total_loss", tf.reshape(total_loss, [])) tf.summary.audio("data", data, sample_rate=FLAGS.sample_rate, max_outputs=5) tf.summary.histogram("frequencies", model.freqs / (2 * np.pi)) if FLAGS.visualize: # Doesn't work for Datasets where batch size can't be inferred data_waveform_op = tfplot.autowrap(waveform_plot, batch=True)( data, hparams.minibatch_size * [hparams.delta_t]) tf.summary.image("data_waveform", data_waveform_op) if FLAGS.num_samples != 0: samples = model.sample(FLAGS.num_samples, FLAGS.sample_duration) sample_waveform_op = tfplot.autowrap( waveform_plot, batch=True)(samples, FLAGS.num_samples * [hparams.delta_t]) tf.summary.image("sample_waveform", sample_waveform_op) step = tf.get_variable("global_step", [], tf.int64, tf.zeros_initializer(), trainable=False) train_op = tf.train.AdamOptimizer( learning_rate=hparams.learning_rate).minimize(total_loss, global_step=step) # TODO Unrolling in time? tf.contrib.training.train( train_op, save_checkpoint_secs=60, logdir= f"{FLAGS.logdir}/{hparams.bond_dim}_{hparams.delta_t}_{hparams.minibatch_size}" )
#!/usr/bin/env python import data score = data.get_score("mozart") audio = data.get_audio("mozart") truth = data.get_truth("mozart", score, audio) print truth.get_path()