def setUp(self):
     super(PerformancePipelineTest, self).setUp()
     self.config = performance_model.PerformanceRnnConfig(
         None,
         note_seq.OneHotEventSequenceEncoderDecoder(
             note_seq.PerformanceOneHotEncoding()),
         contrib_training.HParams())
예제 #2
0
  def __init__(self, steps_per_second, num_velocity_bins, min_pitch, max_pitch,
               add_eos=False, ngrams=None):
    """Initialize a MidiPerformanceEncoder object.

    Encodes MIDI using a performance event encoding. Index 0 is unused as it is
    reserved for padding. Index 1 is unused unless `add_eos` is True, in which
    case it is appended to all encoded performances.

    If `ngrams` is specified, vocabulary is augmented with a set of n-grams over
    the original performance event vocabulary. When encoding, these n-grams will
    be replaced with new event indices. When decoding, the new indices will be
    expanded back into the original n-grams.

    No actual encoder interface is defined in Tensor2Tensor, but this class
    contains the same functions as TextEncoder, ImageEncoder, and AudioEncoder.

    Args:
      steps_per_second: Number of steps per second at which to quantize. Also
          used to determine number of time shift events (up to one second).
      num_velocity_bins: Number of quantized velocity bins to use.
      min_pitch: Minimum MIDI pitch to encode.
      max_pitch: Maximum MIDI pitch to encode (inclusive).
      add_eos: Whether or not to add an EOS event to the end of encoded
          performances.
      ngrams: Optional list of performance event n-grams (tuples) to be
          represented by new indices. N-grams must have length at least 2 and
          should be pre-offset by the number of reserved IDs.

    Raises:
      ValueError: If any n-gram has length less than 2, or contains one of the
          reserved IDs.
    """
    self._steps_per_second = steps_per_second
    self._num_velocity_bins = num_velocity_bins
    self._add_eos = add_eos
    self._ngrams = ngrams or []

    for ngram in self._ngrams:
      if len(ngram) < 2:
        raise ValueError('All n-grams must have length at least 2.')
      if any(i < self.num_reserved_ids for i in ngram):
        raise ValueError('N-grams cannot contain reserved IDs.')

    self._encoding = note_seq.PerformanceOneHotEncoding(
        num_velocity_bins=num_velocity_bins,
        max_shift_steps=steps_per_second,
        min_pitch=min_pitch,
        max_pitch=max_pitch)

    # Create a trie mapping n-grams to new indices.
    ngram_ids = range(self.unigram_vocab_size,
                      self.unigram_vocab_size + len(self._ngrams))
    self._ngrams_trie = pygtrie.Trie(zip(self._ngrams, ngram_ids))

    # Also add all unigrams to the trie.
    self._ngrams_trie.update(zip([(i,) for i in range(self.unigram_vocab_size)],
                                 range(self.unigram_vocab_size)))
예제 #3
0
  def __init__(self,
               num_velocity_bins=0,
               max_tensors_per_notesequence=None,
               hop_size_bars=1,
               chunk_size_bars=1,
               steps_per_quarter=24,
               quarters_per_bar=4,
               min_num_instruments=2,
               max_num_instruments=8,
               min_total_events=8,
               max_events_per_instrument=64,
               min_pitch=performance_lib.MIN_MIDI_PITCH,
               max_pitch=performance_lib.MAX_MIDI_PITCH,
               first_subsequence_only=False,
               chord_encoding=None):
    max_shift_steps = (performance_lib.DEFAULT_MAX_SHIFT_QUARTERS *
                       steps_per_quarter)

    self._performance_encoding = note_seq.PerformanceOneHotEncoding(
        num_velocity_bins=num_velocity_bins,
        max_shift_steps=max_shift_steps,
        min_pitch=min_pitch,
        max_pitch=max_pitch)
    self._chord_encoding = chord_encoding

    self._num_velocity_bins = num_velocity_bins
    self._hop_size_bars = hop_size_bars
    self._chunk_size_bars = chunk_size_bars
    self._steps_per_quarter = steps_per_quarter
    self._steps_per_bar = steps_per_quarter * quarters_per_bar
    self._min_num_instruments = min_num_instruments
    self._max_num_instruments = max_num_instruments
    self._min_total_events = min_total_events
    self._max_events_per_instrument = max_events_per_instrument
    self._min_pitch = min_pitch
    self._max_pitch = max_pitch
    self._first_subsequence_only = first_subsequence_only

    self._max_num_chunks = hop_size_bars // chunk_size_bars
    self._max_steps_truncate = (
        steps_per_quarter * quarters_per_bar * hop_size_bars)

    # Each encoded track will begin with a program specification token
    # (with one extra program for drums).
    num_program_tokens = (
        note_seq.MAX_MIDI_PROGRAM - note_seq.MIN_MIDI_PROGRAM + 2)
    end_token = self._performance_encoding.num_classes + num_program_tokens
    depth = end_token + 1

    max_lengths = [
        self._max_num_chunks, max_num_instruments, max_events_per_instrument]
    if chord_encoding is None:
      control_depth = 0
      control_pad_token = None
    else:
      control_depth = chord_encoding.num_classes
      control_pad_token = chord_encoding.encode_event(note_seq.NO_CHORD)

    super(MultiInstrumentPerformanceConverter, self).__init__(
        input_depth=depth,
        input_dtype=np.bool,
        output_depth=depth,
        output_dtype=np.bool,
        control_depth=control_depth,
        control_dtype=np.bool,
        control_pad_token=control_pad_token,
        end_token=end_token,
        max_lengths=max_lengths,
        max_tensors_per_notesequence=max_tensors_per_notesequence)
예제 #4
0
    super(PerformanceRnnConfig, self).__init__(
        details, encoder_decoder, hparams)
    self.num_velocity_bins = num_velocity_bins
    self.control_signals = control_signals
    self.optional_conditioning = optional_conditioning
    self.note_performance = note_performance


default_configs = {
    'performance':
        PerformanceRnnConfig(
            generator_pb2.GeneratorDetails(
                id='performance', description='Performance RNN'),
            note_seq.OneHotEventSequenceEncoderDecoder(
                note_seq.PerformanceOneHotEncoding()),
            contrib_training.HParams(
                batch_size=64,
                rnn_layer_sizes=[512, 512, 512],
                dropout_keep_prob=1.0,
                clip_norm=3,
                learning_rate=0.001)),
    'performance_with_dynamics':
        PerformanceRnnConfig(
            generator_pb2.GeneratorDetails(
                id='performance_with_dynamics',
                description='Performance RNN with dynamics'),
            note_seq.OneHotEventSequenceEncoderDecoder(
                note_seq.PerformanceOneHotEncoding(num_velocity_bins=32)),
            contrib_training.HParams(
                batch_size=64,