def _to_tensors(self, note_sequence): """Converts NoteSequence to unique, one-hot tensor sequences.""" try: if self._steps_per_quarter: quantized_sequence = mm.quantize_note_sequence( note_sequence, self._steps_per_quarter) if (mm.steps_per_bar_in_quantized_sequence(quantized_sequence) != self._steps_per_bar): return [], [] else: quantized_sequence = mm.quantize_note_sequence_absolute( note_sequence, self._steps_per_second) except (mm.BadTimeSignatureException, mm.NonIntegerStepsPerBarException, mm.NegativeTimeException) as e: return [], [] event_lists, unused_stats = self._event_extractor_fn( quantized_sequence) if self._pad_to_total_time: for e in event_lists: e.set_length(len(e) + e.start_step, from_left=True) e.set_length(quantized_sequence.total_quantized_steps) if self._slice_steps: sliced_event_tuples = [] for l in event_lists: for i in range(self._slice_steps, len(l) + 1, self._steps_per_bar): sliced_event_tuples.append( tuple(l[i - self._slice_steps:i])) else: sliced_event_tuples = [tuple(l) for l in event_lists] # TODO(adarob): Consider handling the fact that different event lists can # be mapped to identical tensors by the encoder_decoder (e.g., Drums). unique_event_tuples = list(set(sliced_event_tuples)) unique_event_tuples = self._maybe_sample_outputs(unique_event_tuples) seqs = [] for t in unique_event_tuples: seqs.append( np_onehot( [self._legacy_encoder_decoder.encode_event(e) for e in t] + ([] if self.end_token is None else [self.end_token]), self.output_depth, self.output_dtype)) return seqs, seqs
def _to_tensors(self, note_sequence): """Converts NoteSequence to unique, one-hot tensor sequences.""" try: if self._steps_per_quarter: quantized_sequence = mm.quantize_note_sequence( note_sequence, self._steps_per_quarter) if (mm.steps_per_bar_in_quantized_sequence(quantized_sequence) != self._steps_per_bar): return [], [] else: quantized_sequence = mm.quantize_note_sequence_absolute( note_sequence, self._steps_per_second) except (mm.BadTimeSignatureException, mm.NonIntegerStepsPerBarException, mm.NegativeTimeException) as e: return [], [] event_lists, unused_stats = self._event_extractor_fn(quantized_sequence) if self._pad_to_total_time: for e in event_lists: e.set_length(len(e) + e.start_step, from_left=True) e.set_length(quantized_sequence.total_quantized_steps) if self._slice_steps: sliced_event_tuples = [] for l in event_lists: for i in range(self._slice_steps, len(l) + 1, self._steps_per_bar): sliced_event_tuples.append(tuple(l[i - self._slice_steps: i])) else: sliced_event_tuples = [tuple(l) for l in event_lists] # TODO(adarob): Consider handling the fact that different event lists can # be mapped to identical tensors by the encoder_decoder (e.g., Drums). unique_event_tuples = list(set(sliced_event_tuples)) unique_event_tuples = self._maybe_sample_outputs(unique_event_tuples) seqs = [] for t in unique_event_tuples: seqs.append(np_onehot( [self._legacy_encoder_decoder.encode_event(e) for e in t] + ([] if self.end_token is None else [self.end_token]), self.output_depth, self.output_dtype)) return seqs, seqs
def _generate(self, input_sequence, generator_options): if len(generator_options.input_sections) > 1: raise mm.SequenceGeneratorException( 'This model supports at most one input_sections message, but got %s' % len(generator_options.input_sections)) if len(generator_options.generate_sections) != 1: raise mm.SequenceGeneratorException( 'This model supports only 1 generate_sections message, but got %s' % len(generator_options.generate_sections)) generate_section = generator_options.generate_sections[0] if generator_options.input_sections: input_section = generator_options.input_sections[0] primer_sequence = mm.trim_note_sequence( input_sequence, input_section.start_time, input_section.end_time) input_start_step = mm.quantize_to_step( input_section.start_time, self.steps_per_second, quantize_cutoff=0.0) else: primer_sequence = input_sequence input_start_step = 0 last_end_time = (max(n.end_time for n in primer_sequence.notes) if primer_sequence.notes else 0) if last_end_time > generate_section.start_time: raise mm.SequenceGeneratorException( 'Got GenerateSection request for section that is before or equal to ' 'the end of the NoteSequence. This model can only extend sequences. ' 'Requested start time: %s, Final note end time: %s' % (generate_section.start_time, last_end_time)) # Quantize the priming sequence. quantized_primer_sequence = mm.quantize_note_sequence_absolute( primer_sequence, self.steps_per_second) extracted_perfs, _ = performance_lib.extract_performances( quantized_primer_sequence, start_step=input_start_step, num_velocity_bins=self.num_velocity_bins) assert len(extracted_perfs) <= 1 generate_start_step = mm.quantize_to_step( generate_section.start_time, self.steps_per_second, quantize_cutoff=0.0) # Note that when quantizing end_step, we set quantize_cutoff to 1.0 so it # always rounds down. This avoids generating a sequence that ends at 5.0 # seconds when the requested end time is 4.99. generate_end_step = mm.quantize_to_step( generate_section.end_time, self.steps_per_second, quantize_cutoff=1.0) if extracted_perfs and extracted_perfs[0]: performance = extracted_perfs[0] else: # If no track could be extracted, create an empty track that starts at the # requested generate_start_step. performance = performance_lib.Performance( steps_per_second=( quantized_primer_sequence.quantization_info.steps_per_second), start_step=generate_start_step, num_velocity_bins=self.num_velocity_bins) # Ensure that the track extends up to the step we want to start generating. performance.set_length(generate_start_step - performance.start_step) # Extract generation arguments from generator options. arg_types = { 'temperature': lambda arg: arg.float_value, 'beam_size': lambda arg: arg.int_value, 'branch_factor': lambda arg: arg.int_value, 'steps_per_iteration': lambda arg: arg.int_value } args = dict((name, value_fn(generator_options.args[name])) for name, value_fn in arg_types.items() if name in generator_options.args) total_steps = performance.num_steps + ( generate_end_step - generate_start_step) if not performance: # Primer is empty; let's just start with silence. performance.set_length(min(performance_lib.MAX_SHIFT_STEPS, total_steps)) while performance.num_steps < total_steps: # Assume there's around 10 notes per second and 4 RNN steps per note. # Can't know for sure until generation is finished because the number of # notes per quantized step is variable. steps_to_gen = total_steps - performance.num_steps rnn_steps_to_gen = 40 * int(math.ceil( float(steps_to_gen) / performance_lib.DEFAULT_STEPS_PER_SECOND)) tf.logging.info( 'Need to generate %d more steps for this sequence, will try asking ' 'for %d RNN steps' % (steps_to_gen, rnn_steps_to_gen)) performance = self._model.generate_performance( len(performance) + rnn_steps_to_gen, performance, **args) if not self.fill_generate_section: # In the interest of speed just go through this loop once, which may not # entirely fill the generate section. break performance.set_length(total_steps) generated_sequence = performance.to_sequence( max_note_duration=self.max_note_duration) assert (generated_sequence.total_time - generate_section.end_time) <= 1e-5 return generated_sequence
def _generate(self, input_sequence, generator_options): if len(generator_options.input_sections) > 1: raise sequence_generator.SequenceGeneratorError( 'This model supports at most one input_sections message, but got %s' % len(generator_options.input_sections)) if len(generator_options.generate_sections) != 1: raise sequence_generator.SequenceGeneratorError( 'This model supports only 1 generate_sections message, but got %s' % len(generator_options.generate_sections)) generate_section = generator_options.generate_sections[0] if generator_options.input_sections: input_section = generator_options.input_sections[0] primer_sequence = mm.trim_note_sequence(input_sequence, input_section.start_time, input_section.end_time) input_start_step = mm.quantize_to_step(input_section.start_time, self.steps_per_second, quantize_cutoff=0.0) else: primer_sequence = input_sequence input_start_step = 0 if primer_sequence.notes: last_end_time = max(n.end_time for n in primer_sequence.notes) else: last_end_time = 0 if last_end_time > generate_section.start_time: raise sequence_generator.SequenceGeneratorError( 'Got GenerateSection request for section that is before or equal to ' 'the end of the NoteSequence. This model can only extend sequences. ' 'Requested start time: %s, Final note end time: %s' % (generate_section.start_time, last_end_time)) # Quantize the priming sequence. quantized_primer_sequence = mm.quantize_note_sequence_absolute( primer_sequence, self.steps_per_second) extracted_perfs, _ = performance_pipeline.extract_performances( quantized_primer_sequence, start_step=input_start_step, num_velocity_bins=self.num_velocity_bins, note_performance=self._note_performance) assert len(extracted_perfs) <= 1 generate_start_step = mm.quantize_to_step(generate_section.start_time, self.steps_per_second, quantize_cutoff=0.0) # Note that when quantizing end_step, we set quantize_cutoff to 1.0 so it # always rounds down. This avoids generating a sequence that ends at 5.0 # seconds when the requested end time is 4.99. generate_end_step = mm.quantize_to_step(generate_section.end_time, self.steps_per_second, quantize_cutoff=1.0) if extracted_perfs and extracted_perfs[0]: performance = extracted_perfs[0] else: # If no track could be extracted, create an empty track that starts at the # requested generate_start_step. performance = mm.Performance( steps_per_second=(quantized_primer_sequence.quantization_info. steps_per_second), start_step=generate_start_step, num_velocity_bins=self.num_velocity_bins) # Ensure that the track extends up to the step we want to start generating. performance.set_length(generate_start_step - performance.start_step) # Extract generation arguments from generator options. arg_types = { 'disable_conditioning': lambda arg: ast.literal_eval(arg.string_value), 'temperature': lambda arg: arg.float_value, 'beam_size': lambda arg: arg.int_value, 'branch_factor': lambda arg: arg.int_value, 'steps_per_iteration': lambda arg: arg.int_value } if self.control_signals: for control in self.control_signals: arg_types[control.name] = lambda arg: ast.literal_eval( arg.string_value) args = dict((name, value_fn(generator_options.args[name])) for name, value_fn in arg_types.items() if name in generator_options.args) # Make sure control signals are present and convert to lists if necessary. if self.control_signals: for control in self.control_signals: if control.name not in args: tf.logging.warning( 'Control value not specified, using default: %s = %s', control.name, control.default_value) args[control.name] = [control.default_value] elif control.validate(args[control.name]): args[control.name] = [args[control.name]] else: if not isinstance(args[control.name], list) or not all( control.validate(value) for value in args[control.name]): tf.logging.fatal('Invalid control value: %s = %s', control.name, args[control.name]) # Make sure disable conditioning flag is present when conditioning is # optional and convert to list if necessary. if self.optional_conditioning: if 'disable_conditioning' not in args: args['disable_conditioning'] = [False] elif isinstance(args['disable_conditioning'], bool): args['disable_conditioning'] = [args['disable_conditioning']] else: if not isinstance( args['disable_conditioning'], list) or not all( isinstance(value, bool) for value in args['disable_conditioning']): tf.logging.fatal('Invalid disable_conditioning value: %s', args['disable_conditioning']) total_steps = performance.num_steps + (generate_end_step - generate_start_step) if 'notes_per_second' in args: mean_note_density = (sum(args['notes_per_second']) / len(args['notes_per_second'])) else: mean_note_density = DEFAULT_NOTE_DENSITY # Set up functions that map generation step to control signal values and # disable conditioning flag. if self.control_signals: control_signal_fns = [] for control in self.control_signals: control_signal_fns.append( functools.partial(_step_to_value, num_steps=total_steps, values=args[control.name])) del args[control.name] args['control_signal_fns'] = control_signal_fns if self.optional_conditioning: args['disable_conditioning_fn'] = functools.partial( _step_to_value, num_steps=total_steps, values=args['disable_conditioning']) del args['disable_conditioning'] if not performance: # Primer is empty; let's just start with silence. performance.set_length( min(performance.max_shift_steps, total_steps)) while performance.num_steps < total_steps: # Assume the average specified (or default) note density and 4 RNN steps # per note. Can't know for sure until generation is finished because the # number of notes per quantized step is variable. note_density = max(1.0, mean_note_density) steps_to_gen = total_steps - performance.num_steps rnn_steps_to_gen = int( math.ceil(4.0 * note_density * steps_to_gen / self.steps_per_second)) tf.logging.info( 'Need to generate %d more steps for this sequence, will try asking ' 'for %d RNN steps' % (steps_to_gen, rnn_steps_to_gen)) performance = self._model.generate_performance( len(performance) + rnn_steps_to_gen, performance, **args) if not self.fill_generate_section: # In the interest of speed just go through this loop once, which may not # entirely fill the generate section. break performance.set_length(total_steps) generated_sequence = performance.to_sequence( max_note_duration=self.max_note_duration) assert (generated_sequence.total_time - generate_section.end_time) <= 1e-5 return generated_sequence
def _generate(self, input_sequence, generator_options): if len(generator_options.input_sections) > 1: raise mm.SequenceGeneratorError( 'This model supports at most one input_sections message, but got %s' % len(generator_options.input_sections)) if len(generator_options.generate_sections) != 1: raise mm.SequenceGeneratorError( 'This model supports only 1 generate_sections message, but got %s' % len(generator_options.generate_sections)) generate_section = generator_options.generate_sections[0] if generator_options.input_sections: input_section = generator_options.input_sections[0] primer_sequence = mm.trim_note_sequence( input_sequence, input_section.start_time, input_section.end_time) input_start_step = mm.quantize_to_step( input_section.start_time, self.steps_per_second, quantize_cutoff=0.0) else: primer_sequence = input_sequence input_start_step = 0 if primer_sequence.notes: last_end_time = max(n.end_time for n in primer_sequence.notes) else: last_end_time = 0 if last_end_time > generate_section.start_time: raise mm.SequenceGeneratorError( 'Got GenerateSection request for section that is before or equal to ' 'the end of the NoteSequence. This model can only extend sequences. ' 'Requested start time: %s, Final note end time: %s' % (generate_section.start_time, last_end_time)) # Quantize the priming sequence. quantized_primer_sequence = mm.quantize_note_sequence_absolute( primer_sequence, self.steps_per_second) extracted_perfs, _ = mm.extract_performances( quantized_primer_sequence, start_step=input_start_step, num_velocity_bins=self.num_velocity_bins, note_performance=self._note_performance) assert len(extracted_perfs) <= 1 generate_start_step = mm.quantize_to_step( generate_section.start_time, self.steps_per_second, quantize_cutoff=0.0) # Note that when quantizing end_step, we set quantize_cutoff to 1.0 so it # always rounds down. This avoids generating a sequence that ends at 5.0 # seconds when the requested end time is 4.99. generate_end_step = mm.quantize_to_step( generate_section.end_time, self.steps_per_second, quantize_cutoff=1.0) if extracted_perfs and extracted_perfs[0]: performance = extracted_perfs[0] else: # If no track could be extracted, create an empty track that starts at the # requested generate_start_step. performance = mm.Performance( steps_per_second=( quantized_primer_sequence.quantization_info.steps_per_second), start_step=generate_start_step, num_velocity_bins=self.num_velocity_bins) # Ensure that the track extends up to the step we want to start generating. performance.set_length(generate_start_step - performance.start_step) # Extract generation arguments from generator options. arg_types = { 'disable_conditioning': lambda arg: ast.literal_eval(arg.string_value), 'temperature': lambda arg: arg.float_value, 'beam_size': lambda arg: arg.int_value, 'branch_factor': lambda arg: arg.int_value, 'steps_per_iteration': lambda arg: arg.int_value } if self.control_signals: for control in self.control_signals: arg_types[control.name] = lambda arg: ast.literal_eval(arg.string_value) args = dict((name, value_fn(generator_options.args[name])) for name, value_fn in arg_types.items() if name in generator_options.args) # Make sure control signals are present and convert to lists if necessary. if self.control_signals: for control in self.control_signals: if control.name not in args: tf.logging.warning( 'Control value not specified, using default: %s = %s', control.name, control.default_value) args[control.name] = [control.default_value] elif control.validate(args[control.name]): args[control.name] = [args[control.name]] else: if not isinstance(args[control.name], list) or not all( control.validate(value) for value in args[control.name]): tf.logging.fatal( 'Invalid control value: %s = %s', control.name, args[control.name]) # Make sure disable conditioning flag is present when conditioning is # optional and convert to list if necessary. if self.optional_conditioning: if 'disable_conditioning' not in args: args['disable_conditioning'] = [False] elif isinstance(args['disable_conditioning'], bool): args['disable_conditioning'] = [args['disable_conditioning']] else: if not isinstance(args['disable_conditioning'], list) or not all( isinstance(value, bool) for value in args['disable_conditioning']): tf.logging.fatal( 'Invalid disable_conditioning value: %s', args['disable_conditioning']) total_steps = performance.num_steps + ( generate_end_step - generate_start_step) if 'notes_per_second' in args: mean_note_density = ( sum(args['notes_per_second']) / len(args['notes_per_second'])) else: mean_note_density = DEFAULT_NOTE_DENSITY # Set up functions that map generation step to control signal values and # disable conditioning flag. if self.control_signals: control_signal_fns = [] for control in self.control_signals: control_signal_fns.append(functools.partial( _step_to_value, num_steps=total_steps, values=args[control.name])) del args[control.name] args['control_signal_fns'] = control_signal_fns if self.optional_conditioning: args['disable_conditioning_fn'] = functools.partial( _step_to_value, num_steps=total_steps, values=args['disable_conditioning']) del args['disable_conditioning'] if not performance: # Primer is empty; let's just start with silence. performance.set_length(min(performance.max_shift_steps, total_steps)) while performance.num_steps < total_steps: # Assume the average specified (or default) note density and 4 RNN steps # per note. Can't know for sure until generation is finished because the # number of notes per quantized step is variable. note_density = max(1.0, mean_note_density) steps_to_gen = total_steps - performance.num_steps rnn_steps_to_gen = int(math.ceil( 4.0 * note_density * steps_to_gen / self.steps_per_second)) tf.logging.info( 'Need to generate %d more steps for this sequence, will try asking ' 'for %d RNN steps' % (steps_to_gen, rnn_steps_to_gen)) performance = self._model.generate_performance( len(performance) + rnn_steps_to_gen, performance, **args) if not self.fill_generate_section: # In the interest of speed just go through this loop once, which may not # entirely fill the generate section. break performance.set_length(total_steps) generated_sequence = performance.to_sequence( max_note_duration=self.max_note_duration) assert (generated_sequence.total_time - generate_section.end_time) <= 1e-5 return generated_sequence
def _generate(self, input_sequence, generator_options): if len(generator_options.input_sections) > 1: raise mm.SequenceGeneratorException( 'This model supports at most one input_sections message, but got %s' % len(generator_options.input_sections)) if len(generator_options.generate_sections) != 1: raise mm.SequenceGeneratorException( 'This model supports only 1 generate_sections message, but got %s' % len(generator_options.generate_sections)) generate_section = generator_options.generate_sections[0] if generator_options.input_sections: input_section = generator_options.input_sections[0] primer_sequence = mm.trim_note_sequence(input_sequence, input_section.start_time, input_section.end_time) input_start_step = mm.quantize_to_step(input_section.start_time, self.steps_per_second, quantize_cutoff=0.0) else: primer_sequence = input_sequence input_start_step = 0 last_end_time = (max( n.end_time for n in primer_sequence.notes) if primer_sequence.notes else 0) if last_end_time > generate_section.start_time: raise mm.SequenceGeneratorException( 'Got GenerateSection request for section that is before or equal to ' 'the end of the NoteSequence. This model can only extend sequences. ' 'Requested start time: %s, Final note end time: %s' % (generate_section.start_time, last_end_time)) # Quantize the priming sequence. quantized_primer_sequence = mm.quantize_note_sequence_absolute( primer_sequence, self.steps_per_second) extracted_perfs, _ = performance_lib.extract_performances( quantized_primer_sequence, start_step=input_start_step, num_velocity_bins=self.num_velocity_bins) assert len(extracted_perfs) <= 1 generate_start_step = mm.quantize_to_step(generate_section.start_time, self.steps_per_second, quantize_cutoff=0.0) # Note that when quantizing end_step, we set quantize_cutoff to 1.0 so it # always rounds down. This avoids generating a sequence that ends at 5.0 # seconds when the requested end time is 4.99. generate_end_step = mm.quantize_to_step(generate_section.end_time, self.steps_per_second, quantize_cutoff=1.0) if extracted_perfs and extracted_perfs[0]: performance = extracted_perfs[0] else: # If no track could be extracted, create an empty track that starts at the # requested generate_start_step. performance = performance_lib.Performance( steps_per_second=(quantized_primer_sequence.quantization_info. steps_per_second), start_step=generate_start_step, num_velocity_bins=self.num_velocity_bins) # Ensure that the track extends up to the step we want to start generating. performance.set_length(generate_start_step - performance.start_step) # Extract generation arguments from generator options. arg_types = { 'note_density': lambda arg: ast.literal_eval(arg.string_value), 'pitch_histogram': lambda arg: ast.literal_eval(arg.string_value), 'disable_conditioning': lambda arg: ast.literal_eval(arg.string_value), 'temperature': lambda arg: arg.float_value, 'beam_size': lambda arg: arg.int_value, 'branch_factor': lambda arg: arg.int_value, 'steps_per_iteration': lambda arg: arg.int_value } args = dict((name, value_fn(generator_options.args[name])) for name, value_fn in arg_types.items() if name in generator_options.args) # Make sure note density is present when conditioning on it and not present # otherwise. if not self.note_density_conditioning and 'note_density' in args: tf.logging.warning( 'Not conditioning on note density, ignoring requested density.' ) del args['note_density'] if self.note_density_conditioning and 'note_density' not in args: tf.logging.warning( 'Conditioning on note density but none requested, using default.' ) args['note_density'] = [DEFAULT_NOTE_DENSITY] # Make sure pitch class histogram is present when conditioning on it and not # present otherwise. if not self.pitch_histogram_conditioning and 'pitch_histogram' in args: tf.logging.warning( 'Not conditioning on pitch histogram, ignoring requested histogram.' ) del args['pitch_histogram'] if self.pitch_histogram_conditioning and 'pitch_histogram' not in args: tf.logging.warning( 'Conditioning on pitch histogram but none requested, using default.' ) args['pitch_histogram'] = [DEFAULT_PITCH_HISTOGRAM] # Make sure disable conditioning flag is present when conditioning is # optional and not present otherwise. if not self.optional_conditioning and 'disable_conditioning' in args: tf.logging.warning( 'No optional conditioning, ignoring disable conditioning flag.' ) del args['disable_conditioning'] if self.optional_conditioning and 'disable_conditioning' not in args: args['disable_conditioning'] = [False] # If a single note density, pitch class histogram, or disable flag is # present, convert to list to simplify further processing. if (self.note_density_conditioning and not isinstance(args['note_density'], list)): args['note_density'] = [args['note_density']] if (self.pitch_histogram_conditioning and not isinstance(args['pitch_histogram'][0], list)): args['pitch_histogram'] = [args['pitch_histogram']] if (self.optional_conditioning and not isinstance(args['disable_conditioning'], list)): args['disable_conditioning'] = [args['disable_conditioning']] # Make sure each pitch class histogram sums to one. if self.pitch_histogram_conditioning: for i in range(len(args['pitch_histogram'])): total = sum(args['pitch_histogram'][i]) if total > 0: args['pitch_histogram'][i] = [ float(count) / total for count in args['pitch_histogram'][i] ] else: tf.logging.warning( 'Pitch histogram is empty, using default.') args['pitch_histogram'][i] = DEFAULT_PITCH_HISTOGRAM total_steps = performance.num_steps + (generate_end_step - generate_start_step) # Set up functions that map generation step to note density, pitch # histogram, and disable conditioning flag. mean_note_density = DEFAULT_NOTE_DENSITY if self.note_density_conditioning: args['note_density_fn'] = partial( _step_to_note_density, num_steps=total_steps, note_densities=args['note_density']) mean_note_density = sum(args['note_density']) / len( args['note_density']) del args['note_density'] if self.pitch_histogram_conditioning: args['pitch_histogram_fn'] = partial( _step_to_pitch_histogram, num_steps=total_steps, pitch_histograms=args['pitch_histogram']) del args['pitch_histogram'] if self.optional_conditioning: args['disable_conditioning_fn'] = partial( _step_to_disable_conditioning, num_steps=total_steps, disable_conditioning_flags=args['disable_conditioning']) del args['disable_conditioning'] if not performance: # Primer is empty; let's just start with silence. performance.set_length( min(performance_lib.MAX_SHIFT_STEPS, total_steps)) while performance.num_steps < total_steps: # Assume the average specified (or default) note density and 4 RNN steps # per note. Can't know for sure until generation is finished because the # number of notes per quantized step is variable. note_density = max(1.0, mean_note_density) steps_to_gen = total_steps - performance.num_steps rnn_steps_to_gen = int( math.ceil(4.0 * note_density * steps_to_gen / self.steps_per_second)) tf.logging.info( 'Need to generate %d more steps for this sequence, will try asking ' 'for %d RNN steps' % (steps_to_gen, rnn_steps_to_gen)) performance = self._model.generate_performance( len(performance) + rnn_steps_to_gen, performance, **args) if not self.fill_generate_section: # In the interest of speed just go through this loop once, which may not # entirely fill the generate section. break performance.set_length(total_steps) generated_sequence = performance.to_sequence( max_note_duration=self.max_note_duration) assert (generated_sequence.total_time - generate_section.end_time) <= 1e-5 return generated_sequence
def _generate(self, input_sequence, generator_options): if len(generator_options.input_sections) > 1: raise mm.SequenceGeneratorException( 'This model supports at most one input_sections message, but got %s' % len(generator_options.input_sections)) if len(generator_options.generate_sections) != 1: raise mm.SequenceGeneratorException( 'This model supports only 1 generate_sections message, but got %s' % len(generator_options.generate_sections)) generate_section = generator_options.generate_sections[0] if generator_options.input_sections: input_section = generator_options.input_sections[0] primer_sequence = mm.trim_note_sequence( input_sequence, input_section.start_time, input_section.end_time) input_start_step = mm.quantize_to_step( input_section.start_time, self.steps_per_second, quantize_cutoff=0.0) else: primer_sequence = input_sequence input_start_step = 0 last_end_time = (max(n.end_time for n in primer_sequence.notes) if primer_sequence.notes else 0) if last_end_time > generate_section.start_time: raise mm.SequenceGeneratorException( 'Got GenerateSection request for section that is before or equal to ' 'the end of the NoteSequence. This model can only extend sequences. ' 'Requested start time: %s, Final note end time: %s' % (generate_section.start_time, last_end_time)) # Quantize the priming sequence. quantized_primer_sequence = mm.quantize_note_sequence_absolute( primer_sequence, self.steps_per_second) extracted_perfs, _ = performance_lib.extract_performances( quantized_primer_sequence, start_step=input_start_step, num_velocity_bins=self.num_velocity_bins) assert len(extracted_perfs) <= 1 generate_start_step = mm.quantize_to_step( generate_section.start_time, self.steps_per_second, quantize_cutoff=0.0) # Note that when quantizing end_step, we set quantize_cutoff to 1.0 so it # always rounds down. This avoids generating a sequence that ends at 5.0 # seconds when the requested end time is 4.99. generate_end_step = mm.quantize_to_step( generate_section.end_time, self.steps_per_second, quantize_cutoff=1.0) if extracted_perfs and extracted_perfs[0]: performance = extracted_perfs[0] else: # If no track could be extracted, create an empty track that starts at the # requested generate_start_step. performance = performance_lib.Performance( steps_per_second=( quantized_primer_sequence.quantization_info.steps_per_second), start_step=generate_start_step, num_velocity_bins=self.num_velocity_bins) # Ensure that the track extends up to the step we want to start generating. performance.set_length(generate_start_step - performance.start_step) # Extract generation arguments from generator options. arg_types = { 'note_density': lambda arg: ast.literal_eval(arg.string_value), 'pitch_histogram': lambda arg: ast.literal_eval(arg.string_value), 'disable_conditioning': lambda arg: ast.literal_eval(arg.string_value), 'temperature': lambda arg: arg.float_value, 'beam_size': lambda arg: arg.int_value, 'branch_factor': lambda arg: arg.int_value, 'steps_per_iteration': lambda arg: arg.int_value } args = dict((name, value_fn(generator_options.args[name])) for name, value_fn in arg_types.items() if name in generator_options.args) # Make sure note density is present when conditioning on it and not present # otherwise. if not self.note_density_conditioning and 'note_density' in args: tf.logging.warning( 'Not conditioning on note density, ignoring requested density.') del args['note_density'] if self.note_density_conditioning and 'note_density' not in args: tf.logging.warning( 'Conditioning on note density but none requested, using default.') args['note_density'] = [DEFAULT_NOTE_DENSITY] # Make sure pitch class histogram is present when conditioning on it and not # present otherwise. if not self.pitch_histogram_conditioning and 'pitch_histogram' in args: tf.logging.warning( 'Not conditioning on pitch histogram, ignoring requested histogram.') del args['pitch_histogram'] if self.pitch_histogram_conditioning and 'pitch_histogram' not in args: tf.logging.warning( 'Conditioning on pitch histogram but none requested, using default.') args['pitch_histogram'] = [DEFAULT_PITCH_HISTOGRAM] # Make sure disable conditioning flag is present when conditioning is # optional and not present otherwise. if not self.optional_conditioning and 'disable_conditioning' in args: tf.logging.warning( 'No optional conditioning, ignoring disable conditioning flag.') del args['disable_conditioning'] if self.optional_conditioning and 'disable_conditioning' not in args: args['disable_conditioning'] = [False] # If a single note density, pitch class histogram, or disable flag is # present, convert to list to simplify further processing. if (self.note_density_conditioning and not isinstance(args['note_density'], list)): args['note_density'] = [args['note_density']] if (self.pitch_histogram_conditioning and not isinstance(args['pitch_histogram'][0], list)): args['pitch_histogram'] = [args['pitch_histogram']] if (self.optional_conditioning and not isinstance(args['disable_conditioning'], list)): args['disable_conditioning'] = [args['disable_conditioning']] # Make sure each pitch class histogram sums to one. if self.pitch_histogram_conditioning: for i in range(len(args['pitch_histogram'])): total = sum(args['pitch_histogram'][i]) if total > 0: args['pitch_histogram'][i] = [float(count) / total for count in args['pitch_histogram'][i]] else: tf.logging.warning('Pitch histogram is empty, using default.') args['pitch_histogram'][i] = DEFAULT_PITCH_HISTOGRAM total_steps = performance.num_steps + ( generate_end_step - generate_start_step) # Set up functions that map generation step to note density, pitch # histogram, and disable conditioning flag. mean_note_density = DEFAULT_NOTE_DENSITY if self.note_density_conditioning: args['note_density_fn'] = partial( _step_to_note_density, num_steps=total_steps, note_densities=args['note_density']) mean_note_density = sum(args['note_density']) / len(args['note_density']) del args['note_density'] if self.pitch_histogram_conditioning: args['pitch_histogram_fn'] = partial( _step_to_pitch_histogram, num_steps=total_steps, pitch_histograms=args['pitch_histogram']) del args['pitch_histogram'] if self.optional_conditioning: args['disable_conditioning_fn'] = partial( _step_to_disable_conditioning, num_steps=total_steps, disable_conditioning_flags=args['disable_conditioning']) del args['disable_conditioning'] if not performance: # Primer is empty; let's just start with silence. performance.set_length(min(performance_lib.MAX_SHIFT_STEPS, total_steps)) while performance.num_steps < total_steps: # Assume the average specified (or default) note density and 4 RNN steps # per note. Can't know for sure until generation is finished because the # number of notes per quantized step is variable. note_density = max(1.0, mean_note_density) steps_to_gen = total_steps - performance.num_steps rnn_steps_to_gen = int(math.ceil( 4.0 * note_density * steps_to_gen / self.steps_per_second)) tf.logging.info( 'Need to generate %d more steps for this sequence, will try asking ' 'for %d RNN steps' % (steps_to_gen, rnn_steps_to_gen)) performance = self._model.generate_performance( len(performance) + rnn_steps_to_gen, performance, **args) if not self.fill_generate_section: # In the interest of speed just go through this loop once, which may not # entirely fill the generate section. break performance.set_length(total_steps) generated_sequence = performance.to_sequence( max_note_duration=self.max_note_duration) assert (generated_sequence.total_time - generate_section.end_time) <= 1e-5 return generated_sequence
def _to_tensors(self, note_sequence): """Converts NoteSequence to unique, one-hot tensor sequences.""" try: if self._steps_per_quarter: quantized_sequence = mm.quantize_note_sequence( note_sequence, self._steps_per_quarter) if (mm.steps_per_bar_in_quantized_sequence(quantized_sequence) != self._steps_per_bar): return ConverterTensors() else: quantized_sequence = mm.quantize_note_sequence_absolute( note_sequence, self._steps_per_second) except (mm.BadTimeSignatureException, mm.NonIntegerStepsPerBarException, mm.NegativeTimeException) as e: return ConverterTensors() if self._chord_encoding and not any( ta.annotation_type == CHORD_SYMBOL for ta in quantized_sequence.text_annotations): # We are conditioning on chords but sequence does not have chords. Try to # infer them. try: mm.infer_chords_for_sequence(quantized_sequence) except mm.ChordInferenceException: return ConverterTensors() event_lists, unused_stats = self._event_extractor_fn(quantized_sequence) if self._pad_to_total_time: for e in event_lists: e.set_length(len(e) + e.start_step, from_left=True) e.set_length(quantized_sequence.total_quantized_steps) if self._slice_steps: sliced_event_lists = [] for l in event_lists: for i in range(self._slice_steps, len(l) + 1, self._steps_per_bar): sliced_event_lists.append(l[i - self._slice_steps: i]) else: sliced_event_lists = event_lists if self._chord_encoding: try: sliced_chord_lists = chords_lib.event_list_chords( quantized_sequence, sliced_event_lists) except chords_lib.CoincidentChordsException: return ConverterTensors() sliced_event_lists = [zip(el, cl) for el, cl in zip(sliced_event_lists, sliced_chord_lists)] # TODO(adarob): Consider handling the fact that different event lists can # be mapped to identical tensors by the encoder_decoder (e.g., Drums). unique_event_tuples = list(set(tuple(l) for l in sliced_event_lists)) unique_event_tuples = self._maybe_sample_outputs(unique_event_tuples) if not unique_event_tuples: return ConverterTensors() control_seqs = [] if self._chord_encoding: unique_event_tuples, unique_chord_tuples = zip( *[zip(*t) for t in unique_event_tuples if t]) for t in unique_chord_tuples: try: chord_tokens = [self._chord_encoding.encode_event(e) for e in t] if self.end_token: # Repeat the last chord instead of using a special token; otherwise # the model may learn to rely on the special token to detect # endings. chord_tokens.append(chord_tokens[-1] if chord_tokens else self._chord_encoding.encode_event(mm.NO_CHORD)) except (mm.ChordSymbolException, mm.ChordEncodingException): return ConverterTensors() control_seqs.append( np_onehot(chord_tokens, self.control_depth, self.control_dtype)) seqs = [] for t in unique_event_tuples: seqs.append(np_onehot( [self._legacy_encoder_decoder.encode_event(e) for e in t] + ([] if self.end_token is None else [self.end_token]), self.output_depth, self.output_dtype)) return ConverterTensors(inputs=seqs, outputs=seqs, controls=control_seqs)