def _quantized_subsequence_to_tensors(self, quantized_subsequence): # Reject sequences with out-of-range pitches. if any(note.pitch < self._min_pitch or note.pitch > self._max_pitch for note in quantized_subsequence.notes): return [], [] # Extract all instruments. tracks, _ = mm.extract_performances( quantized_subsequence, max_steps_truncate=self._max_steps_truncate, num_velocity_bins=self._num_velocity_bins, split_instruments=True) # Reject sequences with too few instruments. if not (self._min_num_instruments <= len(tracks) <= self._max_num_instruments): return [], [] # Sort tracks by program, with drums at the end. tracks = sorted(tracks, key=lambda t: (t.is_drum, t.program)) chunk_size_steps = self._steps_per_bar * self._chunk_size_bars chunks = [[] for _ in range(self._max_num_chunks)] total_length = 0 for track in tracks: # Make sure the track is the proper number of time steps. track.set_length(self._max_steps_truncate) # Split this track into chunks. def new_performance(quantized_sequence, start_step, track=track): return performance_lib.MetricPerformance( quantized_sequence=quantized_sequence, steps_per_quarter=(self._steps_per_quarter if quantized_sequence is None else None), start_step=start_step, num_velocity_bins=self._num_velocity_bins, program=track.program, is_drum=track.is_drum) track_chunks = split_performance(track, chunk_size_steps, new_performance, clip_tied_notes=True) assert len(track_chunks) == self._max_num_chunks track_chunk_lengths = [ len(track_chunk) for track_chunk in track_chunks ] # Each track chunk needs room for program token and end token. if not all(l <= self._max_events_per_instrument - 2 for l in track_chunk_lengths): return [], [] if not all(mm.MIN_MIDI_PROGRAM <= t.program <= mm.MAX_MIDI_PROGRAM for t in track_chunks if not t.is_drum): return [], [] total_length += sum(track_chunk_lengths) # Aggregate by chunk. for i, track_chunk in enumerate(track_chunks): chunks[i].append(track_chunk) # Reject sequences that are too short (in events). if total_length < self._min_total_events: return [], [] num_programs = mm.MAX_MIDI_PROGRAM - mm.MIN_MIDI_PROGRAM + 1 chunk_tensors = [] chunk_chord_tensors = [] for chunk_tracks in chunks: track_tensors = [] for track in chunk_tracks: # Add a special token for program at the beginning of each track. track_tokens = [ self._performance_encoding.num_classes + (num_programs if track.is_drum else track.program) ] # Then encode the performance events. for event in track: track_tokens.append( self._performance_encoding.encode_event(event)) # Then add the end token. track_tokens.append(self.end_token) encoded_track = data.np_onehot(track_tokens, self.output_depth, self.output_dtype) track_tensors.append(encoded_track) if self._chord_encoding: # Extract corresponding chords for each track. The chord sequences may # be different for different tracks even though the underlying chords # are the same, as the performance event times will generally be # different. try: track_chords = chords_lib.event_list_chords( quantized_subsequence, chunk_tracks) except chords_lib.CoincidentChordsException: return [], [] track_chord_tensors = [] try: # Chord encoding for all tracks is inside this try block. If any # track fails we need to skip the whole subsequence. for chords in track_chords: # Start with a pad token corresponding to the track program token. track_chord_tokens = [self._control_pad_token] # Then encode the chords. for chord in chords: track_chord_tokens.append( self._chord_encoding.encode_event(chord)) # Then repeat the final chord for the track end token. track_chord_tokens.append(track_chord_tokens[-1]) encoded_track_chords = data.np_onehot( track_chord_tokens, self.control_depth, self.control_dtype) track_chord_tensors.append(encoded_track_chords) except (mm.ChordSymbolException, mm.ChordEncodingException): return [], [] chunk_chord_tensors.append(track_chord_tensors) chunk_tensors.append(track_tensors) return chunk_tensors, chunk_chord_tensors
def _generate(self, input_sequence, generator_options): if len(generator_options.input_sections) > 1: raise mm.SequenceGeneratorException( 'This model supports at most one input_sections message, but got %s' % len(generator_options.input_sections)) if len(generator_options.generate_sections) != 1: raise mm.SequenceGeneratorException( 'This model supports only 1 generate_sections message, but got %s' % len(generator_options.generate_sections)) generate_section = generator_options.generate_sections[0] if generator_options.input_sections: input_section = generator_options.input_sections[0] primer_sequence = mm.trim_note_sequence(input_sequence, input_section.start_time, input_section.end_time) input_start_step = mm.quantize_to_step(input_section.start_time, self.steps_per_second, quantize_cutoff=0.0) else: primer_sequence = input_sequence input_start_step = 0 last_end_time = (max( n.end_time for n in primer_sequence.notes) if primer_sequence.notes else 0) if last_end_time > generate_section.start_time: raise mm.SequenceGeneratorException( 'Got GenerateSection request for section that is before or equal to ' 'the end of the NoteSequence. This model can only extend sequences. ' 'Requested start time: %s, Final note end time: %s' % (generate_section.start_time, last_end_time)) # Quantize the priming sequence. quantized_primer_sequence = mm.quantize_note_sequence_absolute( primer_sequence, self.steps_per_second) extracted_perfs, _ = mm.extract_performances( quantized_primer_sequence, start_step=input_start_step, num_velocity_bins=self.num_velocity_bins) assert len(extracted_perfs) <= 1 generate_start_step = mm.quantize_to_step(generate_section.start_time, self.steps_per_second, quantize_cutoff=0.0) # Note that when quantizing end_step, we set quantize_cutoff to 1.0 so it # always rounds down. This avoids generating a sequence that ends at 5.0 # seconds when the requested end time is 4.99. generate_end_step = mm.quantize_to_step(generate_section.end_time, self.steps_per_second, quantize_cutoff=1.0) if extracted_perfs and extracted_perfs[0]: performance = extracted_perfs[0] else: # If no track could be extracted, create an empty track that starts at the # requested generate_start_step. performance = mm.Performance( steps_per_second=(quantized_primer_sequence.quantization_info. steps_per_second), start_step=generate_start_step, num_velocity_bins=self.num_velocity_bins) # Ensure that the track extends up to the step we want to start generating. performance.set_length(generate_start_step - performance.start_step) # Extract generation arguments from generator options. arg_types = { 'note_density': lambda arg: ast.literal_eval(arg.string_value), 'pitch_histogram': lambda arg: ast.literal_eval(arg.string_value), 'disable_conditioning': lambda arg: ast.literal_eval(arg.string_value), 'temperature': lambda arg: arg.float_value, 'beam_size': lambda arg: arg.int_value, 'branch_factor': lambda arg: arg.int_value, 'steps_per_iteration': lambda arg: arg.int_value } args = dict((name, value_fn(generator_options.args[name])) for name, value_fn in arg_types.items() if name in generator_options.args) # Make sure note density is present when conditioning on it and not present # otherwise. if not self.note_density_conditioning and 'note_density' in args: tf.logging.warning( 'Not conditioning on note density, ignoring requested density.' ) del args['note_density'] if self.note_density_conditioning and 'note_density' not in args: tf.logging.warning( 'Conditioning on note density but none requested, using default.' ) args['note_density'] = [DEFAULT_NOTE_DENSITY] # Make sure pitch class histogram is present when conditioning on it and not # present otherwise. if not self.pitch_histogram_conditioning and 'pitch_histogram' in args: tf.logging.warning( 'Not conditioning on pitch histogram, ignoring requested histogram.' ) del args['pitch_histogram'] if self.pitch_histogram_conditioning and 'pitch_histogram' not in args: tf.logging.warning( 'Conditioning on pitch histogram but none requested, using default.' ) args['pitch_histogram'] = [DEFAULT_PITCH_HISTOGRAM] # Make sure disable conditioning flag is present when conditioning is # optional and not present otherwise. if not self.optional_conditioning and 'disable_conditioning' in args: tf.logging.warning( 'No optional conditioning, ignoring disable conditioning flag.' ) del args['disable_conditioning'] if self.optional_conditioning and 'disable_conditioning' not in args: args['disable_conditioning'] = [False] # If a single note density, pitch class histogram, or disable flag is # present, convert to list to simplify further processing. if (self.note_density_conditioning and not isinstance(args['note_density'], list)): args['note_density'] = [args['note_density']] if (self.pitch_histogram_conditioning and not isinstance(args['pitch_histogram'][0], list)): args['pitch_histogram'] = [args['pitch_histogram']] if (self.optional_conditioning and not isinstance(args['disable_conditioning'], list)): args['disable_conditioning'] = [args['disable_conditioning']] # Make sure each pitch class histogram sums to one. if self.pitch_histogram_conditioning: for i in range(len(args['pitch_histogram'])): total = sum(args['pitch_histogram'][i]) if total > 0: args['pitch_histogram'][i] = [ float(count) / total for count in args['pitch_histogram'][i] ] else: tf.logging.warning( 'Pitch histogram is empty, using default.') args['pitch_histogram'][i] = DEFAULT_PITCH_HISTOGRAM total_steps = performance.num_steps + (generate_end_step - generate_start_step) # Set up functions that map generation step to note density, pitch # histogram, and disable conditioning flag. mean_note_density = DEFAULT_NOTE_DENSITY if self.note_density_conditioning: args['note_density_fn'] = partial( _step_to_note_density, num_steps=total_steps, note_densities=args['note_density']) mean_note_density = sum(args['note_density']) / len( args['note_density']) del args['note_density'] if self.pitch_histogram_conditioning: args['pitch_histogram_fn'] = partial( _step_to_pitch_histogram, num_steps=total_steps, pitch_histograms=args['pitch_histogram']) del args['pitch_histogram'] if self.optional_conditioning: args['disable_conditioning_fn'] = partial( _step_to_disable_conditioning, num_steps=total_steps, disable_conditioning_flags=args['disable_conditioning']) del args['disable_conditioning'] if not performance: # Primer is empty; let's just start with silence. performance.set_length( min(performance.max_shift_steps, total_steps)) while performance.num_steps < total_steps: # Assume the average specified (or default) note density and 4 RNN steps # per note. Can't know for sure until generation is finished because the # number of notes per quantized step is variable. note_density = max(1.0, mean_note_density) steps_to_gen = total_steps - performance.num_steps rnn_steps_to_gen = int( math.ceil(4.0 * note_density * steps_to_gen / self.steps_per_second)) tf.logging.info( 'Need to generate %d more steps for this sequence, will try asking ' 'for %d RNN steps' % (steps_to_gen, rnn_steps_to_gen)) performance = self._model.generate_performance( len(performance) + rnn_steps_to_gen, performance, **args) if not self.fill_generate_section: # In the interest of speed just go through this loop once, which may not # entirely fill the generate section. break performance.set_length(total_steps) generated_sequence = performance.to_sequence( max_note_duration=self.max_note_duration) assert (generated_sequence.total_time - generate_section.end_time) <= 1e-5 return generated_sequence
def _generate(self, input_sequence, generator_options): if len(generator_options.input_sections) > 1: raise mm.SequenceGeneratorError( 'This model supports at most one input_sections message, but got %s' % len(generator_options.input_sections)) if len(generator_options.generate_sections) != 1: raise mm.SequenceGeneratorError( 'This model supports only 1 generate_sections message, but got %s' % len(generator_options.generate_sections)) generate_section = generator_options.generate_sections[0] if generator_options.input_sections: input_section = generator_options.input_sections[0] primer_sequence = mm.trim_note_sequence( input_sequence, input_section.start_time, input_section.end_time) input_start_step = mm.quantize_to_step( input_section.start_time, self.steps_per_second, quantize_cutoff=0.0) else: primer_sequence = input_sequence input_start_step = 0 if primer_sequence.notes: last_end_time = max(n.end_time for n in primer_sequence.notes) else: last_end_time = 0 if last_end_time > generate_section.start_time: raise mm.SequenceGeneratorError( 'Got GenerateSection request for section that is before or equal to ' 'the end of the NoteSequence. This model can only extend sequences. ' 'Requested start time: %s, Final note end time: %s' % (generate_section.start_time, last_end_time)) # Quantize the priming sequence. quantized_primer_sequence = mm.quantize_note_sequence_absolute( primer_sequence, self.steps_per_second) extracted_perfs, _ = mm.extract_performances( quantized_primer_sequence, start_step=input_start_step, num_velocity_bins=self.num_velocity_bins, note_performance=self._note_performance) assert len(extracted_perfs) <= 1 generate_start_step = mm.quantize_to_step( generate_section.start_time, self.steps_per_second, quantize_cutoff=0.0) # Note that when quantizing end_step, we set quantize_cutoff to 1.0 so it # always rounds down. This avoids generating a sequence that ends at 5.0 # seconds when the requested end time is 4.99. generate_end_step = mm.quantize_to_step( generate_section.end_time, self.steps_per_second, quantize_cutoff=1.0) if extracted_perfs and extracted_perfs[0]: performance = extracted_perfs[0] else: # If no track could be extracted, create an empty track that starts at the # requested generate_start_step. performance = mm.Performance( steps_per_second=( quantized_primer_sequence.quantization_info.steps_per_second), start_step=generate_start_step, num_velocity_bins=self.num_velocity_bins) # Ensure that the track extends up to the step we want to start generating. performance.set_length(generate_start_step - performance.start_step) # Extract generation arguments from generator options. arg_types = { 'disable_conditioning': lambda arg: ast.literal_eval(arg.string_value), 'temperature': lambda arg: arg.float_value, 'beam_size': lambda arg: arg.int_value, 'branch_factor': lambda arg: arg.int_value, 'steps_per_iteration': lambda arg: arg.int_value } if self.control_signals: for control in self.control_signals: arg_types[control.name] = lambda arg: ast.literal_eval(arg.string_value) args = dict((name, value_fn(generator_options.args[name])) for name, value_fn in arg_types.items() if name in generator_options.args) # Make sure control signals are present and convert to lists if necessary. if self.control_signals: for control in self.control_signals: if control.name not in args: tf.logging.warning( 'Control value not specified, using default: %s = %s', control.name, control.default_value) args[control.name] = [control.default_value] elif control.validate(args[control.name]): args[control.name] = [args[control.name]] else: if not isinstance(args[control.name], list) or not all( control.validate(value) for value in args[control.name]): tf.logging.fatal( 'Invalid control value: %s = %s', control.name, args[control.name]) # Make sure disable conditioning flag is present when conditioning is # optional and convert to list if necessary. if self.optional_conditioning: if 'disable_conditioning' not in args: args['disable_conditioning'] = [False] elif isinstance(args['disable_conditioning'], bool): args['disable_conditioning'] = [args['disable_conditioning']] else: if not isinstance(args['disable_conditioning'], list) or not all( isinstance(value, bool) for value in args['disable_conditioning']): tf.logging.fatal( 'Invalid disable_conditioning value: %s', args['disable_conditioning']) total_steps = performance.num_steps + ( generate_end_step - generate_start_step) if 'notes_per_second' in args: mean_note_density = ( sum(args['notes_per_second']) / len(args['notes_per_second'])) else: mean_note_density = DEFAULT_NOTE_DENSITY # Set up functions that map generation step to control signal values and # disable conditioning flag. if self.control_signals: control_signal_fns = [] for control in self.control_signals: control_signal_fns.append(functools.partial( _step_to_value, num_steps=total_steps, values=args[control.name])) del args[control.name] args['control_signal_fns'] = control_signal_fns if self.optional_conditioning: args['disable_conditioning_fn'] = functools.partial( _step_to_value, num_steps=total_steps, values=args['disable_conditioning']) del args['disable_conditioning'] if not performance: # Primer is empty; let's just start with silence. performance.set_length(min(performance.max_shift_steps, total_steps)) while performance.num_steps < total_steps: # Assume the average specified (or default) note density and 4 RNN steps # per note. Can't know for sure until generation is finished because the # number of notes per quantized step is variable. note_density = max(1.0, mean_note_density) steps_to_gen = total_steps - performance.num_steps rnn_steps_to_gen = int(math.ceil( 4.0 * note_density * steps_to_gen / self.steps_per_second)) tf.logging.info( 'Need to generate %d more steps for this sequence, will try asking ' 'for %d RNN steps' % (steps_to_gen, rnn_steps_to_gen)) performance = self._model.generate_performance( len(performance) + rnn_steps_to_gen, performance, **args) if not self.fill_generate_section: # In the interest of speed just go through this loop once, which may not # entirely fill the generate section. break performance.set_length(total_steps) generated_sequence = performance.to_sequence( max_note_duration=self.max_note_duration) assert (generated_sequence.total_time - generate_section.end_time) <= 1e-5 return generated_sequence
def _quantized_subsequence_to_tensors(self, quantized_subsequence): # Reject sequences with out-of-range pitches. if any(note.pitch < self._min_pitch or note.pitch > self._max_pitch for note in quantized_subsequence.notes): return [], [] # Extract all instruments. tracks, _ = mm.extract_performances( quantized_subsequence, max_steps_truncate=self._max_steps_truncate, num_velocity_bins=self._num_velocity_bins, split_instruments=True) # Reject sequences with too few instruments. if not (self._min_num_instruments <= len(tracks) <= self._max_num_instruments): return [], [] # Sort tracks by program, with drums at the end. tracks = sorted(tracks, key=lambda t: (t.is_drum, t.program)) chunk_size_steps = self._steps_per_bar * self._chunk_size_bars chunks = [[] for _ in range(self._max_num_chunks)] total_length = 0 for track in tracks: # Make sure the track is the proper number of time steps. track.set_length(self._max_steps_truncate) # Split this track into chunks. def new_performance(quantized_sequence, start_step, track=track): steps_per_quarter = ( self._steps_per_quarter if quantized_sequence is None else None) return performance_lib.MetricPerformance( quantized_sequence=quantized_sequence, steps_per_quarter=steps_per_quarter, start_step=start_step, num_velocity_bins=self._num_velocity_bins, program=track.program, is_drum=track.is_drum) track_chunks = split_performance( track, chunk_size_steps, new_performance, clip_tied_notes=True) assert len(track_chunks) == self._max_num_chunks track_chunk_lengths = [len(track_chunk) for track_chunk in track_chunks] # Each track chunk needs room for program token and end token. if not all(l <= self._max_events_per_instrument - 2 for l in track_chunk_lengths): return [], [] if not all(mm.MIN_MIDI_PROGRAM <= t.program <= mm.MAX_MIDI_PROGRAM for t in track_chunks if not t.is_drum): return [], [] total_length += sum(track_chunk_lengths) # Aggregate by chunk. for i, track_chunk in enumerate(track_chunks): chunks[i].append(track_chunk) # Reject sequences that are too short (in events). if total_length < self._min_total_events: return [], [] num_programs = mm.MAX_MIDI_PROGRAM - mm.MIN_MIDI_PROGRAM + 1 chunk_tensors = [] chunk_chord_tensors = [] for chunk_tracks in chunks: track_tensors = [] for track in chunk_tracks: # Add a special token for program at the beginning of each track. track_tokens = [self._performance_encoding.num_classes + ( num_programs if track.is_drum else track.program)] # Then encode the performance events. for event in track: track_tokens.append(self._performance_encoding.encode_event(event)) # Then add the end token. track_tokens.append(self.end_token) encoded_track = data.np_onehot( track_tokens, self.output_depth, self.output_dtype) track_tensors.append(encoded_track) if self._chord_encoding: # Extract corresponding chords for each track. The chord sequences may # be different for different tracks even though the underlying chords # are the same, as the performance event times will generally be # different. try: track_chords = chords_lib.event_list_chords( quantized_subsequence, chunk_tracks) except chords_lib.CoincidentChordsError: return [], [] track_chord_tensors = [] try: # Chord encoding for all tracks is inside this try block. If any # track fails we need to skip the whole subsequence. for chords in track_chords: # Start with a pad token corresponding to the track program token. track_chord_tokens = [self._control_pad_token] # Then encode the chords. for chord in chords: track_chord_tokens.append( self._chord_encoding.encode_event(chord)) # Then repeat the final chord for the track end token. track_chord_tokens.append(track_chord_tokens[-1]) encoded_track_chords = data.np_onehot( track_chord_tokens, self.control_depth, self.control_dtype) track_chord_tensors.append(encoded_track_chords) except (mm.ChordSymbolError, mm.ChordEncodingError): return [], [] chunk_chord_tensors.append(track_chord_tensors) chunk_tensors.append(track_tensors) return chunk_tensors, chunk_chord_tensors