def setUp(self): super(PerformancePipelineTest, self).setUp() self.config = performance_model.PerformanceRnnConfig( None, note_seq.OneHotEventSequenceEncoderDecoder( note_seq.PerformanceOneHotEncoding()), contrib_training.HParams())
def __init__(self, steps_per_second, num_velocity_bins, min_pitch, max_pitch, add_eos=False, ngrams=None): """Initialize a MidiPerformanceEncoder object. Encodes MIDI using a performance event encoding. Index 0 is unused as it is reserved for padding. Index 1 is unused unless `add_eos` is True, in which case it is appended to all encoded performances. If `ngrams` is specified, vocabulary is augmented with a set of n-grams over the original performance event vocabulary. When encoding, these n-grams will be replaced with new event indices. When decoding, the new indices will be expanded back into the original n-grams. No actual encoder interface is defined in Tensor2Tensor, but this class contains the same functions as TextEncoder, ImageEncoder, and AudioEncoder. Args: steps_per_second: Number of steps per second at which to quantize. Also used to determine number of time shift events (up to one second). num_velocity_bins: Number of quantized velocity bins to use. min_pitch: Minimum MIDI pitch to encode. max_pitch: Maximum MIDI pitch to encode (inclusive). add_eos: Whether or not to add an EOS event to the end of encoded performances. ngrams: Optional list of performance event n-grams (tuples) to be represented by new indices. N-grams must have length at least 2 and should be pre-offset by the number of reserved IDs. Raises: ValueError: If any n-gram has length less than 2, or contains one of the reserved IDs. """ self._steps_per_second = steps_per_second self._num_velocity_bins = num_velocity_bins self._add_eos = add_eos self._ngrams = ngrams or [] for ngram in self._ngrams: if len(ngram) < 2: raise ValueError('All n-grams must have length at least 2.') if any(i < self.num_reserved_ids for i in ngram): raise ValueError('N-grams cannot contain reserved IDs.') self._encoding = note_seq.PerformanceOneHotEncoding( num_velocity_bins=num_velocity_bins, max_shift_steps=steps_per_second, min_pitch=min_pitch, max_pitch=max_pitch) # Create a trie mapping n-grams to new indices. ngram_ids = range(self.unigram_vocab_size, self.unigram_vocab_size + len(self._ngrams)) self._ngrams_trie = pygtrie.Trie(zip(self._ngrams, ngram_ids)) # Also add all unigrams to the trie. self._ngrams_trie.update(zip([(i,) for i in range(self.unigram_vocab_size)], range(self.unigram_vocab_size)))
def __init__(self, num_velocity_bins=0, max_tensors_per_notesequence=None, hop_size_bars=1, chunk_size_bars=1, steps_per_quarter=24, quarters_per_bar=4, min_num_instruments=2, max_num_instruments=8, min_total_events=8, max_events_per_instrument=64, min_pitch=performance_lib.MIN_MIDI_PITCH, max_pitch=performance_lib.MAX_MIDI_PITCH, first_subsequence_only=False, chord_encoding=None): max_shift_steps = (performance_lib.DEFAULT_MAX_SHIFT_QUARTERS * steps_per_quarter) self._performance_encoding = note_seq.PerformanceOneHotEncoding( num_velocity_bins=num_velocity_bins, max_shift_steps=max_shift_steps, min_pitch=min_pitch, max_pitch=max_pitch) self._chord_encoding = chord_encoding self._num_velocity_bins = num_velocity_bins self._hop_size_bars = hop_size_bars self._chunk_size_bars = chunk_size_bars self._steps_per_quarter = steps_per_quarter self._steps_per_bar = steps_per_quarter * quarters_per_bar self._min_num_instruments = min_num_instruments self._max_num_instruments = max_num_instruments self._min_total_events = min_total_events self._max_events_per_instrument = max_events_per_instrument self._min_pitch = min_pitch self._max_pitch = max_pitch self._first_subsequence_only = first_subsequence_only self._max_num_chunks = hop_size_bars // chunk_size_bars self._max_steps_truncate = ( steps_per_quarter * quarters_per_bar * hop_size_bars) # Each encoded track will begin with a program specification token # (with one extra program for drums). num_program_tokens = ( note_seq.MAX_MIDI_PROGRAM - note_seq.MIN_MIDI_PROGRAM + 2) end_token = self._performance_encoding.num_classes + num_program_tokens depth = end_token + 1 max_lengths = [ self._max_num_chunks, max_num_instruments, max_events_per_instrument] if chord_encoding is None: control_depth = 0 control_pad_token = None else: control_depth = chord_encoding.num_classes control_pad_token = chord_encoding.encode_event(note_seq.NO_CHORD) super(MultiInstrumentPerformanceConverter, self).__init__( input_depth=depth, input_dtype=np.bool, output_depth=depth, output_dtype=np.bool, control_depth=control_depth, control_dtype=np.bool, control_pad_token=control_pad_token, end_token=end_token, max_lengths=max_lengths, max_tensors_per_notesequence=max_tensors_per_notesequence)
super(PerformanceRnnConfig, self).__init__( details, encoder_decoder, hparams) self.num_velocity_bins = num_velocity_bins self.control_signals = control_signals self.optional_conditioning = optional_conditioning self.note_performance = note_performance default_configs = { 'performance': PerformanceRnnConfig( generator_pb2.GeneratorDetails( id='performance', description='Performance RNN'), note_seq.OneHotEventSequenceEncoderDecoder( note_seq.PerformanceOneHotEncoding()), contrib_training.HParams( batch_size=64, rnn_layer_sizes=[512, 512, 512], dropout_keep_prob=1.0, clip_norm=3, learning_rate=0.001)), 'performance_with_dynamics': PerformanceRnnConfig( generator_pb2.GeneratorDetails( id='performance_with_dynamics', description='Performance RNN with dynamics'), note_seq.OneHotEventSequenceEncoderDecoder( note_seq.PerformanceOneHotEncoding(num_velocity_bins=32)), contrib_training.HParams( batch_size=64,