def create_example(example_id, ns, wav_data, velocity_range=None): """Creates a tf.train.Example proto for training or testing.""" if velocity_range is None: velocity_range = velocity_range_from_sequence(ns) # Ensure that all sequences for training and evaluation have gone through # sustain processing. sus_ns = sequences_lib.apply_sustain_control_changes(ns) example = tf.train.Example( features=tf.train.Features( feature={ 'id': tf.train.Feature( bytes_list=tf.train.BytesList( value=[example_id.encode('utf-8')])), 'sequence': tf.train.Feature( bytes_list=tf.train.BytesList( value=[sus_ns.SerializeToString()])), 'audio': tf.train.Feature( bytes_list=tf.train.BytesList(value=[wav_data])), 'velocity_range': tf.train.Feature( bytes_list=tf.train.BytesList( value=[velocity_range.SerializeToString()])), })) return example
def testSustainPipeline(self): note_sequence = common_testing_lib.parse_test_proto( music_pb2.NoteSequence, """ time_signatures: { numerator: 4 denominator: 4} tempos: { qpm: 60}""") testing_lib.add_track_to_sequence(note_sequence, 0, [(11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01)]) testing_lib.add_control_changes_to_sequence(note_sequence, 0, [(0.0, 64, 127), (0.75, 64, 0), (2.0, 64, 127), (3.0, 64, 0), (3.75, 64, 127), (4.5, 64, 127), (4.8, 64, 0), (4.9, 64, 127), (6.0, 64, 0)]) expected_sequence = sequences_lib.apply_sustain_control_changes( note_sequence) unit = note_sequence_pipelines.SustainPipeline() self._unit_transform_test(unit, note_sequence, [expected_sequence])
def convert_midi_to_proto(path, dest_dir): midi = pretty_midi.PrettyMIDI(path) for i, inst in enumerate(midi.instruments): num_distinct_pitches = sum([i > 5 for i in inst.get_pitch_class_histogram()]) if inst.is_drum or num_distinct_pitches < 5 or len(inst.notes) < 30: midi.instruments.remove(inst) ns = mm.midi_to_note_sequence(midi) ns = apply_sustain_control_changes(ns) del ns.control_changes[:] out_file = os.path.join(dest_dir, os.path.basename(path)) + '.pb' with open(out_file, 'wb') as f: f.write(ns.SerializeToString())
def sequence_to_pianoroll_fn(sequence_tensor, velocity_range_tensor): """Converts sequence to pianorolls.""" velocity_range = music_pb2.VelocityRange.FromString(velocity_range_tensor) sequence = music_pb2.NoteSequence.FromString(sequence_tensor) sequence = sequences_lib.apply_sustain_control_changes(sequence) roll = sequences_lib.sequence_to_pianoroll( sequence, frames_per_second=hparams_frames_per_second(hparams), min_pitch=constants.MIN_MIDI_PITCH, max_pitch=constants.MAX_MIDI_PITCH, min_frame_occupancy_for_label=hparams.min_frame_occupancy_for_label, onset_mode=hparams.onset_mode, onset_length_ms=hparams.onset_length, offset_length_ms=hparams.offset_length, onset_delay_ms=hparams.onset_delay, min_velocity=velocity_range.min, max_velocity=velocity_range.max) return (roll.active, roll.weights, roll.onsets, roll.onset_velocities, roll.offsets)
def truncate_note_sequence(sequence, truncate_secs): """Truncates a NoteSequence to the given length.""" sus_sequence = sequences_lib.apply_sustain_control_changes(sequence) truncated_seq = music_pb2.NoteSequence() for note in sus_sequence.notes: start_time = note.start_time end_time = note.end_time if start_time > truncate_secs: continue if end_time > truncate_secs: end_time = truncate_secs modified_note = truncated_seq.notes.add() modified_note.MergeFrom(note) modified_note.start_time = start_time modified_note.end_time = end_time if truncated_seq.notes: truncated_seq.total_time = truncated_seq.notes[-1].end_time return truncated_seq
def mix_sequences(individual_samples, sample_rate, individual_sequences): """Mix multiple audio/notesequence pairs together. All sequences will be repeated until they are as long as the longest sequence. Note that the mixed sequence will contain only the (sustain-processed) notes from the individual sequences. All other control changes and metadata will not be preserved. Args: individual_samples: A list of audio samples to mix. sample_rate: Rate at which to interpret the samples individual_sequences: A list of NoteSequences to mix. Returns: mixed_samples: The mixed audio. mixed_sequence: The mixed NoteSequence. """ # Normalize samples and sequence velocities before mixing. # This ensures that the velocities/loudness of the individual samples # are treated equally. for i, samples in enumerate(individual_samples): individual_samples[i] = librosa.util.normalize(samples, norm=np.inf) for sequence in individual_sequences: velocities = [note.velocity for note in sequence.notes] velocity_max = np.max(velocities) for note in sequence.notes: note.velocity = int( (note.velocity / velocity_max) * constants.MAX_MIDI_VELOCITY) # Ensure that samples are always at least as long as their paired sequences. for i, (samples, sequence) in enumerate( zip(individual_samples, individual_sequences)): if len(samples) / sample_rate < sequence.total_time: padding = int(math.ceil( (sequence.total_time - len(samples) / sample_rate) * sample_rate)) individual_samples[i] = np.pad(samples, [0, padding], 'constant') # Repeat each ns/wav pair to be as long as the longest wav. max_duration = np.max([len(s) for s in individual_samples]) / sample_rate extended_samples = [] extended_sequences = [] for samples, sequence in zip(individual_samples, individual_sequences): extended_samples.append( audio_io.repeat_samples_to_duration(samples, sample_rate, max_duration)) extended_sequences.append( sequences_lib.repeat_sequence_to_duration( sequence, max_duration, sequence_duration=len(samples) / sample_rate)) # Mix samples and sequences together mixed_samples = np.zeros_like(extended_samples[0]) for samples in extended_samples: mixed_samples += samples / len(extended_samples) mixed_sequence = music_pb2.NoteSequence() mixed_sequence.ticks_per_quarter = constants.STANDARD_PPQ del mixed_sequence.notes[:] for sequence in extended_sequences: # Process sustain changes before copying notes. sus_sequence = sequences_lib.apply_sustain_control_changes(sequence) if sus_sequence.total_time > mixed_sequence.total_time: mixed_sequence.total_time = sus_sequence.total_time # TODO(fjord): Manage instrument/program numbers. mixed_sequence.notes.extend(sus_sequence.notes) return mixed_samples, mixed_sequence
def find_split_points(note_sequence, samples, sample_rate, min_length, max_length): """Returns times at which there are no notes. The general strategy employed is to first check if there are places in the sustained pianoroll where no notes are active within the max_length window; if so the middle of the last gap is chosen as the split point. If not, then it checks if there are places in the pianoroll without sustain where no notes are active and then finds last zero crossing of the wav file and chooses that as the split point. If neither of those is true, then it chooses the last zero crossing within the max_length window as the split point. If there are no zero crossings in the entire window, then it basically gives up and advances time forward by max_length. Args: note_sequence: The NoteSequence to split. samples: The audio file as samples. sample_rate: The sample rate (samples/second) of the audio file. min_length: Minimum number of seconds in a split. max_length: Maximum number of seconds in a split. Returns: A list of split points in seconds from the beginning of the file. """ if not note_sequence.notes: return [] end_time = note_sequence.total_time note_sequence_sustain = sequences_lib.apply_sustain_control_changes( note_sequence) ranges_nosustain = find_inactive_ranges(note_sequence) ranges_sustain = find_inactive_ranges(note_sequence_sustain) nosustain_starts = [x[0] for x in ranges_nosustain] sustain_starts = [x[0] for x in ranges_sustain] nosustain_ends = [x[1] for x in ranges_nosustain] sustain_ends = [x[1] for x in ranges_sustain] split_points = [0.] while end_time - split_points[-1] > max_length: max_advance = split_points[-1] + max_length # check for interval in sustained sequence pos = bisect.bisect_right(sustain_ends, max_advance) if pos < len(sustain_starts) and max_advance > sustain_starts[pos]: split_points.append(max_advance) # if no interval, or we didn't fit, try the unmodified sequence elif pos == 0 or sustain_starts[pos - 1] <= split_points[-1] + min_length: # no splits available, use non sustain notes and find close zero crossing pos = bisect.bisect_right(nosustain_ends, max_advance) if pos < len(nosustain_starts) and max_advance > nosustain_starts[pos]: # we fit, great, try to split at a zero crossing zxc_start = nosustain_starts[pos] zxc_end = max_advance last_zero_xing = _last_zero_crossing( samples, int(math.floor(zxc_start * sample_rate)), int(math.ceil(zxc_end * sample_rate))) if last_zero_xing: last_zero_xing = float(last_zero_xing) / sample_rate split_points.append(last_zero_xing) else: # give up and just return where there are at least no notes split_points.append(max_advance) else: # there are no good places to cut, so just pick the last zero crossing # check the entire valid range for zero crossings start_sample = int( math.ceil((split_points[-1] + min_length) * sample_rate)) + 1 end_sample = start_sample + (max_length - min_length) * sample_rate last_zero_xing = _last_zero_crossing(samples, start_sample, end_sample) if last_zero_xing: last_zero_xing = float(last_zero_xing) / sample_rate split_points.append(last_zero_xing) else: # give up and advance by max amount split_points.append(max_advance) else: # only advance as far as max_length new_time = min(np.mean(ranges_sustain[pos - 1]), max_advance) split_points.append(new_time) if split_points[-1] != end_time: split_points.append(end_time) # ensure that we've generated a valid sequence of splits for prev, curr in zip(split_points[:-1], split_points[1:]): assert curr > prev assert curr - prev <= max_length + 1e-8 if curr < end_time: assert curr - prev >= min_length - 1e-8 assert end_time - split_points[-1] < max_length return split_points
def process(self, kv): # Seed random number generator based on key so that hop times are # deterministic. key, ns_str = kv m = hashlib.md5(key) random.seed(int(m.hexdigest(), 16)) # Deserialize NoteSequence proto. ns = note_seq.NoteSequence.FromString(ns_str) # Apply sustain pedal. ns = sequences_lib.apply_sustain_control_changes(ns) # Remove control changes as there are potentially a lot of them and they are # no longer needed. del ns.control_changes[:] for _ in range(self._num_replications): for augment_fn in self._augment_fns: # Augment and encode the performance. try: augmented_performance_sequence = augment_fn(ns) except DataAugmentationError: Metrics.counter('extract_examples', 'augment_performance_failed').inc() continue seq = self._encode_performance_fn( augmented_performance_sequence) # feed in performance as both input/output to music transformer # chopping sequence into length 2048 (throw out shorter sequences) if len(seq) >= 2048: max_offset = len(seq) - 2048 offset = random.randrange(max_offset + 1) cropped_seq = seq[offset:offset + 2048] example_dict = { 'inputs': cropped_seq, 'targets': cropped_seq } if self._melody: # decode truncated performance sequence for melody inference decoded_midi = self._decode_performance_fn(cropped_seq) decoded_ns = note_seq.midi_io.midi_file_to_note_sequence( decoded_midi) # extract melody from cropped performance sequence melody_instrument = melody_inference.infer_melody_for_sequence( decoded_ns, melody_interval_scale=2.0, rest_prob=0.1, instantaneous_non_max_pitch_prob=1e-15, instantaneous_non_empty_rest_prob=0.0, instantaneous_missing_pitch_prob=1e-15) # remove non-melody notes from score score_sequence = copy.deepcopy(decoded_ns) score_notes = [] for note in score_sequence.notes: if note.instrument == melody_instrument: score_notes.append(note) del score_sequence.notes[:] score_sequence.notes.extend(score_notes) # encode melody encode_score_fn = self._encode_score_fns['melody'] example_dict['melody'] = encode_score_fn( score_sequence) # make sure performance input also matches targets; needed for # compatibility of both perf and (mel & perf) autoencoders if self._noisy: # randomly sample a pitch shift to construct noisy performance all_pitches = [x.pitch for x in decoded_ns.notes] min_val = min(all_pitches) max_val = max(all_pitches) transpose_range = range(-(min_val - 21), 108 - max_val + 1) try: transpose_range.remove( 0) # make sure you transpose except ValueError: pass transpose_amount = random.choice(transpose_range) augmented_ns, _ = sequences_lib.transpose_note_sequence( decoded_ns, transpose_amount, min_allowed_pitch=21, max_allowed_pitch=108, in_place=False) aug_seq = self._encode_performance_fn(augmented_ns) example_dict['performance'] = aug_seq else: example_dict['performance'] = example_dict[ 'targets'] del example_dict['inputs'] Metrics.counter('extract_examples', 'encoded_example').inc() Metrics.distribution( 'extract_examples', 'performance_length_in_seconds').update( int(augmented_performance_sequence.total_time)) yield generator_utils.to_example(example_dict)
def process(self, kv): # Seed random number generator based on key so that hop times are # deterministic. key, ns_str = kv m = hashlib.md5(key.encode('utf-8')) random.seed(int(m.hexdigest(), 16)) # Deserialize NoteSequence proto. ns = note_seq.NoteSequence.FromString(ns_str) # Apply sustain pedal. ns = sequences_lib.apply_sustain_control_changes(ns) # Remove control changes as there are potentially a lot of them and they are # no longer needed. del ns.control_changes[:] if (self._min_hop_size_seconds and ns.total_time < self._min_hop_size_seconds): Metrics.counter('extract_examples', 'sequence_too_short').inc() return sequences = [] for _ in range(self._num_replications): if self._max_hop_size_seconds: if self._max_hop_size_seconds == self._min_hop_size_seconds: # Split using fixed hop size. sequences += sequences_lib.split_note_sequence( ns, self._max_hop_size_seconds) else: # Sample random hop positions such that each segment size is within # the specified range. hop_times = [0.0] while hop_times[ -1] <= ns.total_time - self._min_hop_size_seconds: if hop_times[ -1] + self._max_hop_size_seconds < ns.total_time: # It's important that we get a valid hop size here, since the # remainder of the sequence is too long. max_offset = min( self._max_hop_size_seconds, ns.total_time - self._min_hop_size_seconds - hop_times[-1]) else: # It's okay if the next hop time is invalid (in which case we'll # just stop). max_offset = self._max_hop_size_seconds offset = random.uniform(self._min_hop_size_seconds, max_offset) hop_times.append(hop_times[-1] + offset) # Split at the chosen hop times (ignoring zero and the final invalid # time). sequences += sequences_lib.split_note_sequence( ns, hop_times[1:-1]) else: sequences += [ns] for performance_sequence in sequences: if self._encode_score_fns: # We need to extract a score. if not self._absolute_timing: # Beats are required to extract a score with metric timing. beats = [ ta for ta in performance_sequence.text_annotations if ta.annotation_type == BEAT and ta.time <= performance_sequence.total_time ] if len(beats) < 2: Metrics.counter('extract_examples', 'not_enough_beats').inc() continue # Ensure the sequence starts and ends on a beat. performance_sequence = sequences_lib.extract_subsequence( performance_sequence, start_time=min(beat.time for beat in beats), end_time=max(beat.time for beat in beats)) # Infer beat-aligned chords (only for relative timing). try: chord_inference.infer_chords_for_sequence( performance_sequence, chord_change_prob=0.25, chord_note_concentration=50.0, add_key_signatures=True) except chord_inference.ChordInferenceError: Metrics.counter('extract_examples', 'chord_inference_failed').inc() continue # Infer melody regardless of relative/absolute timing. try: melody_instrument = melody_inference.infer_melody_for_sequence( performance_sequence, melody_interval_scale=2.0, rest_prob=0.1, instantaneous_non_max_pitch_prob=1e-15, instantaneous_non_empty_rest_prob=0.0, instantaneous_missing_pitch_prob=1e-15) except melody_inference.MelodyInferenceError: Metrics.counter('extract_examples', 'melody_inference_failed').inc() continue if not self._absolute_timing: # Now rectify detected beats to occur at fixed tempo. # TODO(iansimon): also include the alignment score_sequence, unused_alignment = sequences_lib.rectify_beats( performance_sequence, beats_per_minute=SCORE_BPM) else: # Score uses same timing as performance. score_sequence = copy.deepcopy(performance_sequence) # Remove melody notes from performance. performance_notes = [] for note in performance_sequence.notes: if note.instrument != melody_instrument: performance_notes.append(note) del performance_sequence.notes[:] performance_sequence.notes.extend(performance_notes) # Remove non-melody notes from score. score_notes = [] for note in score_sequence.notes: if note.instrument == melody_instrument: score_notes.append(note) del score_sequence.notes[:] score_sequence.notes.extend(score_notes) # Remove key signatures and beat/chord annotations from performance. del performance_sequence.key_signatures[:] del performance_sequence.text_annotations[:] Metrics.counter('extract_examples', 'extracted_score').inc() for augment_fn in self._augment_fns: # Augment and encode the performance. try: augmented_performance_sequence = augment_fn( performance_sequence) except DataAugmentationError: Metrics.counter('extract_examples', 'augment_performance_failed').inc() continue example_dict = { 'targets': self._encode_performance_fn(augmented_performance_sequence) } if not example_dict['targets']: Metrics.counter('extract_examples', 'skipped_empty_targets').inc() continue if (self._random_crop_length and len(example_dict['targets']) > self._random_crop_length): # Take a random crop of the encoded performance. max_offset = len( example_dict['targets']) - self._random_crop_length offset = random.randrange(max_offset + 1) example_dict['targets'] = example_dict['targets'][ offset:offset + self._random_crop_length] if self._encode_score_fns: # Augment the extracted score. try: augmented_score_sequence = augment_fn(score_sequence) except DataAugmentationError: Metrics.counter('extract_examples', 'augment_score_failed').inc() continue # Apply all score encoding functions. skip = False for name, encode_score_fn in self._encode_score_fns.items( ): example_dict[name] = encode_score_fn( augmented_score_sequence) if not example_dict[name]: Metrics.counter('extract_examples', 'skipped_empty_%s' % name).inc() skip = True break if skip: continue Metrics.counter('extract_examples', 'encoded_example').inc() Metrics.distribution( 'extract_examples', 'performance_length_in_seconds').update( int(augmented_performance_sequence.total_time)) yield generator_utils.to_example(example_dict)
def transform(self, note_sequence): return [sequences_lib.apply_sustain_control_changes(note_sequence)]
def transform(self, input_object): note_sequence = input_object return [sequences_lib.apply_sustain_control_changes(note_sequence)]