def get_note_from_softmax(self, softmax): """Extracts a one-hot encoding of the most probable note. Args: softmax: Softmax probabilities over possible next notes. Returns: One-hot encoding of most probable note. """ note_idx = np.argmax(softmax) note_enc = rl_tuner_ops.make_onehot([note_idx], rl_tuner_ops.NUM_CLASSES) return np.reshape(note_enc, (rl_tuner_ops.NUM_CLASSES))
def fitness(self, should_print=False): RL = True if not RL: # self.create_midi_file() repertory = "output/" file = repertory + str(self.ind) + ".mid" return -abs(self.vae.get_distance(file, self.ind)) else: ''' self.rlt.train(num_steps=100000, exploration_period=500000) stat_dict = self.rlt.evaluate_music_theory_metrics(num_compositions=100) self.rlt.plot_rewards() stat3 = self.rlt.generate_music_sequence(visualize_probs=True, title='post_rl') print(stat3) exit(0) ''' # print("NOTE COMPO: ") self.rlt.num_notes_in_melody = self.number_of_notes self.rlt.reset_composition() to_mean_note_reward = [] to_mean_rnn = [] for note in self.sequence: one_hot = np.array( rl_tuner_ops.make_onehot([note.bit.pitch], 38)).flatten() note_reward = self.rlt.reward_music_theory(one_hot) # if should_print: # print(one_hot,note_reward ) self.rlt.composition.append(np.argmax(one_hot)) self.rlt.beat += 1 to_mean_note_reward.append(note_reward) a, b, c = self.rlt.action(one_hot) reward_scores = np.reshape(c, (38)) # print(to_mean_rnn) note_rnn_reward = self.rlt.reward_from_reward_rnn_scores( one_hot, reward_scores) to_mean_rnn.append(note_rnn_reward) # print(self.rlt.composition) mean_note_reward = np.mean(to_mean_note_reward) to_mean_rnn = np.mean(to_mean_rnn) return to_mean_rnn + mean_note_reward
def testRewardNetwork(self): graph = tf.Graph() self.session = tf.Session(graph=graph) note_rnn = note_rnn_loader.NoteRNNLoader( graph, scope='test', checkpoint_dir=None) #, midi_primer='/tmp/RL/nice.mid') note_rnn.initialize_new(self.session) #print("NOTESEQ", note_rnn.primer) #print("AFER") rlt = rl_tuner.RLTuner(self.output_dir, note_rnn_checkpoint_dir=self.checkpoint_dir) initial_note = rlt.prime_internal_models() print("INITIAL NOTE", initial_note) print("FIRST SCORE", rlt.reward_key(initial_note)) action = rlt.action(initial_note, 100, enable_random=True) print("ACTION CHOSEN ", action[0]) print("ACTION REWARD ", action[1]) print("ACTION NEXT OBS ", action[2]) print("FINAL", rlt.reward_key(action[2])) print("ONE HOT CREATED") x = np.array(rl_tuner_ops.make_onehot([10], 24)).flatten() print(x) print("FINAL", rlt.reward_key(x)) last_observation = rlt.prime_internal_models() rlt.num_notes_in_melody = 12 rlt.reset_composition() for _ in range(rlt.num_notes_in_melody): _, new_observation, reward_scores = rlt.action( last_observation, 0, enable_random=False, sample_next_obs=False) music_theory_reward = rlt.reward_music_theory(new_observation) #music_theory_rewards = music_theory_reward * rlt.reward_scaler print(music_theory_reward) print(new_observation) rlt.composition.append(np.argmax(new_observation)) rlt.beat += 1 last_observation = new_observation print("num note", rlt.num_notes_in_melody) rlt.reset_composition() final = [] test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0] final.append(test) test = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0] final.append(test) test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0] final.append(test) test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0] final.append(test) test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] final.append(test) avg = [] print("BEGIN") for x in final: reward = rlt.reward_music_theory(x) print(x, reward) rlt.composition.append(np.argmax(x)) rlt.beat += 1 avg.append(reward) rlt.music_theory_reward_last_n += reward * rlt.reward_scaler print("AVG", np.mean(avg)) rlt.reset_composition() final = [] test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] final.append(test) test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0] final.append(test) test = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] final.append(test) test = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0] final.append(test) test = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] final.append(test) print("SECOND") avg = [] for x in final: reward = rlt.reward_music_theory(x) print(x, reward) rlt.composition.append(np.argmax(x)) rlt.beat += 1 avg.append(reward) rlt.music_theory_reward_last_n += reward * rlt.reward_scaler print("AVG", np.mean(avg))