def _assemble_batch(self, melody, chords, with_correct=True): encoded_melodies = [[] for _ in self.encodings] relative_posns = [[] for _ in self.encodings] correct_notes = [] chord_roots = [] chord_types = [] for m, c in zip(melody, chords): m = leadsheet.constrain_melody(m, self.bounds) for i, encoding in enumerate(self.encodings): e_m, r_p = encoding.encode_melody_and_position(m, c) encoded_melodies[i].append(e_m) relative_posns[i].append(r_p) correct_notes.append( Encoding.encode_absolute_melody(m, self.bounds.lowbound, self.bounds.highbound)) c_roots, c_types = zip(*c) chord_roots.append(c_roots) chord_types.append(c_types) retlist = [ np.array(chord_types, np.float32), np.array(chord_roots, np.int32) ] if with_correct: retlist.append(np.array(correct_notes, np.int32)) retlist.extend(np.array(x, np.int32) for x in relative_posns) retlist.extend(np.array(x, np.int32) for x in encoded_melodies) return retlist
def _build(det_dropout): all_out_probs = [] for encoding, lstmstack, encoded_melody, relative_pos in zip(self.encodings, self.lstmstacks, encoded_melodies, relative_posns): activations = lstmstack.do_preprocess_scan( timestep=T.tile(T.arange(n_time), (n_batch,1)) , relative_position=relative_pos, cur_chord_type=chord_types, cur_chord_root=chord_roots, last_output=T.concatenate([T.tile(encoding.initial_encoded_form(), (n_batch,1,1)), encoded_melody[:,:-1,:] ], 1), deterministic_dropout=det_dropout) out_probs = encoding.decode_to_probs(activations, relative_pos, self.bounds.lowbound, self.bounds.highbound) all_out_probs.append(out_probs) reduced_out_probs = functools.reduce((lambda x,y: x*y), all_out_probs) if self.normalize_artic_only: non_artic_probs = reduced_out_probs[:,:,:2] artic_probs = reduced_out_probs[:,:,2:] non_artic_sum = T.sum(non_artic_probs, 2, keepdims=True) artic_sum = T.sum(artic_probs, 2, keepdims=True) norm_artic_probs = artic_probs*(1-non_artic_sum)/artic_sum norm_out_probs = T.concatenate([non_artic_probs, norm_artic_probs], 2) else: normsum = T.sum(reduced_out_probs, 2, keepdims=True) normsum = T.maximum(normsum, constants.EPSILON) norm_out_probs = reduced_out_probs/normsum return Encoding.compute_loss(norm_out_probs, correct_notes, True)
def decode(self, chords, feat_strengths, feat_vects): assert self.decode_fun is not None, "Need to call setup_decode before decode" chord_roots = [] chord_types = [] for c in chords: c_roots, c_types = zip(*c) chord_roots.append(c_roots) chord_types.append(c_types) chosen = self.decode_fun(np.array(chord_roots, np.int32), np.array(chord_types, np.float32), feat_strengths, feat_vects) return [Encoding.decode_absolute_melody(c, self.bounds.lowbound, self.bounds.highbound) for c in chosen]
def _scan_fn(*inputs): # inputs is [ spec_sequences..., last_absolute_position, spec_taps..., spec_non_sequences... ] inputs = list(inputs) partitioned_inputs = [[] for _ in specs] for cur_part, spec in zip(partitioned_inputs, specs): cur_part.extend(inputs[:len(spec.sequences)]) del inputs[:len(spec.sequences)] last_absolute_chosen = inputs.pop(0) for cur_part, spec in zip(partitioned_inputs, specs): cur_part.extend(inputs[:spec.num_taps]) del inputs[:spec.num_taps] for cur_part, spec in zip(partitioned_inputs, specs): cur_part.extend(inputs[:len(spec.non_sequences)]) del inputs[:len(spec.non_sequences)] scan_routs = [ lstmstack.sample_scan_routine(spec, *p_input) for lstmstack,spec,p_input in zip(lstmstacks, specs, partitioned_inputs) ] new_posns = [] all_out_probs = [] for scan_rout, encoding in zip(scan_routs, encodings): last_rel_pos, last_out, cur_kwargs = scan_rout.send(None) new_pos = encoding.get_new_relative_position(last_absolute_chosen, last_rel_pos, last_out, bounds.lowbound, bounds.highbound, **cur_kwargs) new_posns.append(new_pos) addtl_kwargs = { "last_output": last_out } out_activations = scan_rout.send((new_pos, addtl_kwargs)) out_probs = encoding.decode_to_probs(out_activations,new_pos,bounds.lowbound, bounds.highbound) all_out_probs.append(out_probs) reduced_out_probs = functools.reduce((lambda x,y: x*y), all_out_probs) if normalize_artic_only: non_artic_probs = reduced_out_probs[:,:2] artic_probs = reduced_out_probs[:,2:] non_artic_sum = T.sum(non_artic_probs, 1, keepdims=True) artic_sum = T.sum(artic_probs, 1, keepdims=True) norm_artic_probs = artic_probs*(1-non_artic_sum)/artic_sum norm_out_probs = T.concatenate([non_artic_probs, norm_artic_probs], 1) else: normsum = T.sum(reduced_out_probs, 1, keepdims=True) normsum = T.maximum(normsum, constants.EPSILON) norm_out_probs = reduced_out_probs/normsum sampled_note = Encoding.sample_absolute_probs(srng, norm_out_probs) outputs = [] for scan_rout, encoding, new_pos in zip(scan_routs, encodings, new_posns): encoded_output = encoding.note_to_encoding(sampled_note, new_pos, bounds.lowbound, bounds.highbound) scan_outputs = scan_rout.send(encoded_output) scan_rout.close() outputs.extend(scan_outputs) return [sampled_note, norm_out_probs] + all_out_probs + outputs
def generate(self, chords): assert self.generate_fun is not None, "Need to call setup_generate before generate" chord_roots = [] chord_types = [] for c in chords: c_roots, c_types = zip(*c) chord_roots.append(c_roots) chord_types.append(c_types) chosen = self.generate_fun(np.array(chord_roots, np.int32),np.array(chord_types, np.float32)) return [Encoding.decode_absolute_melody(c, self.bounds.lowbound, self.bounds.highbound) for c in chosen]
def _build(det_dropout): activations = self.lstmstack.do_preprocess_scan( timestep=T.tile(T.arange(n_time), (n_batch,1)) , relative_position=relative_pos, cur_chord_type=chord_types, cur_chord_root=chord_roots, last_output=T.concatenate([T.tile(self.encoding.initial_encoded_form(), (n_batch,1,1)), encoded_melody[:,:-1,:] ], 1), deterministic_dropout=det_dropout) out_probs = self.encoding.decode_to_probs(activations, relative_pos, self.bounds.lowbound, self.bounds.highbound) return Encoding.compute_loss(out_probs, correct_notes, True)
def generate_visualize(self, chords): assert self.generate_fun is not None, "Need to call setup_generate before generate" chord_roots = [] chord_types = [] for c in chords: c_roots, c_types = zip(*c) chord_roots.append(c_roots) chord_types.append(c_types) chosen, all_probs = self.generate_visualize_fun(chord_roots, chord_types) melody = [Encoding.decode_absolute_melody(c, self.bounds.lowbound, self.bounds.highbound) for c in chosen] return melody, chosen, all_probs
def generate_visualize(self, chords): assert self.generate_fun is not None, "Need to call setup_generate before generate" chord_roots = [] chord_types = [] for c in chords: c_roots, c_types = zip(*c) chord_roots.append(c_roots) chord_types.append(c_types) stuff = self.generate_visualize_fun(chord_roots, chord_types) chosen, all_probs = stuff[:2] melody = [Encoding.decode_absolute_melody(c, self.bounds.lowbound, self.bounds.highbound) for c in chosen] return melody, chosen, all_probs, stuff[2:]
def decode_visualize(self, chords, feat_strengths, feat_vects): assert self.decode_visualize_fun is not None, "Need to call setup_decode before decode_visualize" chord_roots = [] chord_types = [] for c in chords: c_roots, c_types = zip(*c) chord_roots.append(c_roots) chord_types.append(c_types) stuff = self.decode_visualize_fun(np.array(chord_roots, np.int32), np.array(chord_types, np.float32), feat_strengths, feat_vects) chosen, all_probs = stuff[:2] melody = [Encoding.decode_absolute_melody(c, self.bounds.lowbound, self.bounds.highbound) for c in chosen] return melody, chosen, all_probs, stuff[2:]
def decode_visualize(self, chords, feat_strengths, feat_vects): assert self.decode_visualize_fun is not None, "Need to call setup_decode before decode_visualize" chord_roots = [] chord_types = [] for c in chords: c_roots, c_types = zip(*c) chord_roots.append(c_roots) chord_types.append(c_types) stuff = self.decode_visualize_fun(np.array(chord_roots, np.int32), np.array(chord_types, np.float32), feat_strengths, feat_vects) chosen, all_probs = stuff[:2] melody = [ Encoding.decode_absolute_melody(c, self.bounds.lowbound, self.bounds.highbound) for c in chosen ] return melody, chosen, all_probs, stuff[2:]
def _assemble_batch(self, melody, chords): encoded_melody = [] relative_pos = [] correct_notes = [] chord_roots = [] chord_types = [] for m,c in zip(melody,chords): m = leadsheet.constrain_melody(m, self.bounds) e_m, r_p = self.encoding.encode_melody_and_position(m,c) encoded_melody.append(e_m) relative_pos.append(r_p) correct_notes.append(Encoding.encode_absolute_melody(m, self.bounds.lowbound, self.bounds.highbound)) c_roots, c_types = zip(*c) chord_roots.append(c_roots) chord_types.append(c_types) return (np.array(chord_types, np.float32), np.array(chord_roots, np.int32), np.array(relative_pos, np.int32), np.array(encoded_melody, np.float32), np.array(correct_notes, np.int32))
def _assemble_batch(self, melody, chords): encoded_melodies = [[] for _ in self.encodings] relative_posns = [[] for _ in self.encodings] correct_notes = [] chord_roots = [] chord_types = [] for m,c in zip(melody,chords): m = leadsheet.constrain_melody(m, self.bounds) for i,encoding in enumerate(self.encodings): e_m, r_p = encoding.encode_melody_and_position(m,c) encoded_melodies[i].append(e_m) relative_posns[i].append(r_p) correct_notes.append(Encoding.encode_absolute_melody(m, self.bounds.lowbound, self.bounds.highbound)) c_roots, c_types = zip(*c) chord_roots.append(c_roots) chord_types.append(c_types) return ([np.array(chord_types, np.float32), np.array(chord_roots, np.int32), np.array(correct_notes, np.int32)] + [np.array(x, np.int32) for x in relative_posns] + [np.array(x, np.int32) for x in encoded_melodies])
def _scan_fn(*inputs): # inputs is [ spec_sequences..., last_absolute_position, spec_taps..., spec_non_sequences... ] inputs = list(inputs) last_absolute_chosen = inputs.pop(len(spec.sequences)) scan_rout = self.lstmstack.sample_scan_routine(spec, *inputs) last_rel_pos, last_out, cur_kwargs = scan_rout.send(None) new_pos = self.encoding.get_new_relative_position(last_absolute_chosen, last_rel_pos, last_out, self.bounds.lowbound, self.bounds.highbound, **cur_kwargs) addtl_kwargs = { "last_output": last_out } out_activations = scan_rout.send((new_pos, addtl_kwargs)) out_probs = self.encoding.decode_to_probs(out_activations,new_pos,self.bounds.lowbound, self.bounds.highbound) sampled_note = Encoding.sample_absolute_probs(self.srng, out_probs) encoded_output = self.encoding.note_to_encoding(sampled_note, new_pos, self.bounds.lowbound, self.bounds.highbound) scan_outputs = scan_rout.send(encoded_output) scan_rout.close() return [sampled_note, out_probs] + scan_outputs
def _build(det_dropout): all_activations = [] for encoding, enc_lstmstack, encoded_melody, relative_pos in zip(self.encodings, self.enc_lstmstacks, encoded_melodies, relative_posns): activations = enc_lstmstack.do_preprocess_scan( timestep=T.tile(T.arange(n_time), (n_batch,1)) , relative_position=relative_pos, cur_chord_type=chord_types, cur_chord_root=chord_roots, cur_input=encoded_melody, deterministic_dropout=det_dropout) all_activations.append(activations) reduced_activations = functools.reduce((lambda x,y: x+y), all_activations) queue_loss, feat_strengths, feat_vects, queue_info = self.qman.process(reduced_activations, extra_info=True) features = QueueManager.queue_transform(feat_strengths, feat_vects) all_out_probs = [] for encoding, dec_lstmstack, encoded_melody, relative_pos in zip(self.encodings, self.dec_lstmstacks, encoded_melodies, relative_posns): activations = dec_lstmstack.do_preprocess_scan( timestep=T.tile(T.arange(n_time), (n_batch,1)) , relative_position=relative_pos, cur_chord_type=chord_types, cur_chord_root=chord_roots, cur_feature=features, last_output=T.concatenate([T.tile(encoding.initial_encoded_form(), (n_batch,1,1)), encoded_melody[:,:-1,:] ], 1), deterministic_dropout=det_dropout) out_probs = encoding.decode_to_probs(activations, relative_pos, self.bounds.lowbound, self.bounds.highbound) all_out_probs.append(out_probs) reduced_out_probs = functools.reduce((lambda x,y: x*y), all_out_probs) normsum = T.sum(reduced_out_probs, 2, keepdims=True) normsum = T.maximum(normsum, constants.EPSILON) norm_out_probs = reduced_out_probs/normsum reconstruction_loss, reconstruction_info = Encoding.compute_loss(norm_out_probs, correct_notes, extra_info=True) queue_surrogate_loss_parts = self.qman.surrogate_loss(reconstruction_loss, queue_info) updates = [] full_info = queue_info.copy() full_info.update(reconstruction_info) full_info["queue_loss"] = queue_loss full_info["reconstruction_loss"] = reconstruction_loss float_n_batch = T.cast(n_batch,'float32') if self.loss_mode is "add": full_loss = queue_loss + reconstruction_loss elif self.loss_mode is "priority": curviness = np.array(self.loss_mode_params[0], np.float32)*float_n_batch # ln( e^x + e^y - 1 ) # ln( C(e^x + e^y - 1) ) - ln(C) # ln( e^c(e^x + e^y - 1) ) - c # ln( e^(x+c) + e^(y+c) - e^c ) - c # ln( e^(x-c) + e^(y-c) - e^(-c) ) + c # Now let c = maximum(x,y), d = minimum(x,y). WOLOG replace x=c, y=d # ln( e^(c-c) + e^(d-c) - e^(-c) ) + c # ln( 1 + e^(d-c) - e^(-c) ) + c x = reconstruction_loss/curviness y = queue_loss/curviness c = T.maximum(x,y) d = T.minimum(x,y) full_loss = (T.log( 1 + T.exp(d-c) - T.exp(-c)) + c)*curviness elif self.loss_mode is "cutoff": cutoff_val = np.array(self.loss_mode_params[0], np.float32) full_loss = T.switch(reconstruction_loss<cutoff_val*float_n_batch, reconstruction_loss+queue_loss, reconstruction_loss) elif self.loss_mode is "trigger": trigger_val = np.array(self.loss_mode_params[0], np.float32) trigger_speed = np.array(1.0/self.loss_mode_params[1], np.float32) trigger_is_on = theano.shared(np.array(0, np.int8)) trigger_scale = theano.shared(np.array(0.0, np.float32)) full_loss = reconstruction_loss + trigger_scale * queue_loss updates.append((trigger_is_on, T.or_(trigger_is_on, reconstruction_loss<trigger_val*float_n_batch))) updates.append((trigger_scale, T.switch(trigger_is_on, T.minimum(trigger_scale + trigger_speed, np.array(1.0,np.float32)), np.array(0.0,np.float32)))) full_info["trigger_scale"] = trigger_scale if queue_surrogate_loss_parts is not None: surrogate_loss, addtl_updates = queue_surrogate_loss_parts full_loss = full_loss + surrogate_loss updates.extend(addtl_updates) full_info["surrogate_loss"] = surrogate_loss return full_loss, full_info, updates
def _build(det_dropout): all_activations = [] for encoding, enc_lstmstack, encoded_melody, relative_pos in zip( self.encodings, self.enc_lstmstacks, encoded_melodies, relative_posns): activations = enc_lstmstack.do_preprocess_scan( timestep=T.tile(T.arange(n_time), (n_batch, 1)), relative_position=relative_pos, cur_chord_type=chord_types, cur_chord_root=chord_roots, cur_input=encoded_melody, deterministic_dropout=det_dropout) all_activations.append(activations) reduced_activations = functools.reduce((lambda x, y: x + y), all_activations) queue_loss, feat_strengths, feat_vects, queue_info = self.qman.process( reduced_activations, extra_info=True) features = QueueManager.queue_transform(feat_strengths, feat_vects) all_out_probs = [] for encoding, dec_lstmstack, encoded_melody, relative_pos in zip( self.encodings, self.dec_lstmstacks, encoded_melodies, relative_posns): activations = dec_lstmstack.do_preprocess_scan( timestep=T.tile(T.arange(n_time), (n_batch, 1)), relative_position=relative_pos, cur_chord_type=chord_types, cur_chord_root=chord_roots, cur_feature=features, last_output=T.concatenate([ T.tile(encoding.initial_encoded_form(), (n_batch, 1, 1)), encoded_melody[:, :-1, :] ], 1), deterministic_dropout=det_dropout) out_probs = encoding.decode_to_probs(activations, relative_pos, self.bounds.lowbound, self.bounds.highbound) all_out_probs.append(out_probs) reduced_out_probs = functools.reduce((lambda x, y: x * y), all_out_probs) normsum = T.sum(reduced_out_probs, 2, keepdims=True) normsum = T.maximum(normsum, constants.EPSILON) norm_out_probs = reduced_out_probs / normsum reconstruction_loss, reconstruction_info = Encoding.compute_loss( norm_out_probs, correct_notes, extra_info=True) queue_surrogate_loss_parts = self.qman.surrogate_loss( reconstruction_loss, queue_info) updates = [] full_info = queue_info.copy() full_info.update(reconstruction_info) full_info["queue_loss"] = queue_loss full_info["reconstruction_loss"] = reconstruction_loss float_n_batch = T.cast(n_batch, 'float32') if self.loss_mode is "add": full_loss = queue_loss + reconstruction_loss elif self.loss_mode is "priority": curviness = np.array(self.loss_mode_params[0], np.float32) * float_n_batch # ln( e^x + e^y - 1 ) # ln( C(e^x + e^y - 1) ) - ln(C) # ln( e^c(e^x + e^y - 1) ) - c # ln( e^(x+c) + e^(y+c) - e^c ) - c # ln( e^(x-c) + e^(y-c) - e^(-c) ) + c # Now let c = maximum(x,y), d = minimum(x,y). WOLOG replace x=c, y=d # ln( e^(c-c) + e^(d-c) - e^(-c) ) + c # ln( 1 + e^(d-c) - e^(-c) ) + c x = reconstruction_loss / curviness y = queue_loss / curviness c = T.maximum(x, y) d = T.minimum(x, y) full_loss = (T.log(1 + T.exp(d - c) - T.exp(-c)) + c) * curviness elif self.loss_mode is "cutoff": cutoff_val = np.array(self.loss_mode_params[0], np.float32) full_loss = T.switch( reconstruction_loss < cutoff_val * float_n_batch, reconstruction_loss + queue_loss, reconstruction_loss) elif self.loss_mode is "trigger": trigger_val = np.array(self.loss_mode_params[0], np.float32) trigger_speed = np.array(1.0 / self.loss_mode_params[1], np.float32) trigger_is_on = theano.shared(np.array(0, np.int8)) trigger_scale = theano.shared(np.array(0.0, np.float32)) full_loss = reconstruction_loss + trigger_scale * queue_loss updates.append( (trigger_is_on, T.or_(trigger_is_on, reconstruction_loss < trigger_val * float_n_batch))) updates.append((trigger_scale, T.switch( trigger_is_on, T.minimum(trigger_scale + trigger_speed, np.array(1.0, np.float32)), np.array(0.0, np.float32)))) full_info["trigger_scale"] = trigger_scale if queue_surrogate_loss_parts is not None: surrogate_loss, addtl_updates = queue_surrogate_loss_parts full_loss = full_loss + surrogate_loss updates.extend(addtl_updates) full_info["surrogate_loss"] = surrogate_loss return full_loss, full_info, updates