def main(_): model_path = os.path.join('./model/', FLAGS.name) if os.path.exists(model_path) is False: os.makedirs(model_path) if os.path.isdir(model_path): checkpoint_path =\ tf.train.latest_checkpoint(model_path) dataset = load_dataset() batch_gen = dataset.batches(FLAGS.num_seqs, FLAGS.num_steps, 10) print(EventSeq.dim()) model = CharRNN(EventSeq.dim(), ControlSeq.dim(), num_seqs=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.sess.run(tf.global_variables_initializer()) model.load(checkpoint_path) model.train( batch_gen, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def transposition(events, controls, offset=0): # events [steps, batch_size, event_dim] # return events, controls events = np.array(events, dtype=np.int64) controls = np.array(controls, dtype=np.float32) event_feat_ranges = EventSeq.feat_ranges() on = event_feat_ranges['note_on'] off = event_feat_ranges['note_off'] if offset > 0: indeces0 = (((on.start <= events) & (events < on.stop - offset)) | ((off.start <= events) & (events < off.stop - offset))) indeces1 = (((on.stop - offset <= events) & (events < on.stop)) | ((off.stop - offset <= events) & (events < off.stop))) events[indeces0] += offset events[indeces1] += offset - 12 elif offset < 0: indeces0 = (((on.start - offset <= events) & (events < on.stop)) | ((off.start - offset <= events) & (events < off.stop))) indeces1 = (((on.start <= events) & (events < on.start - offset)) | ((off.start <= events) & (events < off.start - offset))) events[indeces0] += offset events[indeces1] += offset + 12 assert ((0 <= events) & (events < EventSeq.dim())).all() histr = ControlSeq.feat_ranges()['pitch_histogram'] controls[:, :, histr.start:histr.stop] = np.roll( controls[:, :, histr.start:histr.stop], offset, -1) return events, controls
def event_indeces_to_midi_file(event_indeces, midi_file_name, velocity_scale=0.8): event_seq = EventSeq.from_array(event_indeces) note_seq = event_seq.to_note_seq() for note in note_seq.notes: note.velocity = int((note.velocity - 64) * velocity_scale + 64) note_seq.to_midi_file(midi_file_name) return len(note_seq.notes)
def preprocess_midi(path): note_seq = NoteSeq.from_drum_midi_file(path)#get note note_seq.adjust_time(-note_seq.notes[0].start)# change offset of note seq event_seq = EventSeq.from_note_seq(note_seq) control_seq = ControlSeq.from_event_seq(event_seq) return event_seq.to_array(), control_seq.to_compressed_array()
def main(_): if os.path.isfile(FLAGS.control) or os.path.isdir(FLAGS.control): if os.path.isdir(FLAGS.control): files = list(utils.find_files_by_extensions(FLAGS.control)) assert len(files) > 0, 'no file in "{control}"'.format( control=FLAGS.control) control = np.random.choice(files) events, compressed_controls = torch.load(FLAGS.control) controls = ControlSeq.recover_compressed_array(compressed_controls) max_len = FLAGS.max_length if FLAGS.max_length == 0: max_len = controls.shape[0] control = np.expand_dims(controls, 1).repeat(1, 1) control = 'control sequence from "{control}"'.format(control=control) assert max_len > 0, 'either max length or control sequence length should be given' #FLAGS.start_string = FLAGS.start_string.decode('utf-8') if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(EventSeq.dim(), ControlSeq.dim(), sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.sess.run(tf.global_variables_initializer()) model.load(FLAGS.checkpoint_path) outputs = model.sample(1000, prime=events[0:100], vocab_size=EventSeq.dim()) outputs = outputs.reshape([-1, 1]) print(outputs) name = 'output-{i:03d}.mid'.format(i=0) path = os.path.join("output/", name) n_notes = utils.event_indeces_to_midi_file(outputs[:, 0], path) print('===> {path} ({n_notes} notes)'.format(path=path, n_notes=n_notes))
def __init__(self, event_dim=EventSeq.dim(), hidden_dim=512, gru_layers=3, gru_dropout=0.3): super().__init__() self.event_embedding = nn.Embedding(event_dim, hidden_dim) self.gru = nn.GRU(hidden_dim, hidden_dim, num_layers=gru_layers, dropout=gru_dropout) self.attn = nn.Parameter(torch.randn(hidden_dim), requires_grad=True) self.output_fc = nn.Linear(hidden_dim, 1) self.output_fc_activation = nn.Sigmoid()
def preprocess_midi(path): note_seq = NoteSeq.from_midi_file(path) note_seq.adjust_time(-note_seq.notes[0].start) event_seq = EventSeq.from_note_seq(note_seq) control_seq = ControlSeq.from_event_seq(event_seq) return event_seq.to_array(), control_seq.to_compressed_array()
sess_path = options.sess_path data_path = options.data_path saving_interval = options.saving_interval learning_rate = options.learning_rate batch_size = options.batch_size window_size = options.window_size stride_size = options.stride_size use_transposition = options.use_transposition control_ratio = options.control_ratio teacher_forcing_ratio = options.teacher_forcing_ratio reset_optimizer = options.reset_optimizer enable_logging = options.enable_logging event_dim = EventSeq.dim() control_dim = ControlSeq.dim() model_config = config.model model_params = utils.params2dict(options.model_params) model_config.update(model_params) device = config.device print('-' * 70) print('Session path:', sess_path) print('Dataset path:', data_path) print('Saving interval:', saving_interval) print('-' * 70) print('Hyperparameters:', utils.dict2params(model_config)) print('Learning rate:', learning_rate)
from sequence import EventSeq, NoteSeq import numpy as np rand_array = np.random.random_sample([2048]) rand_array = rand_array * 240 rand_array = rand_array.astype(np.int) print(rand_array) es = EventSeq.from_array(rand_array) es.to_note_seq().to_midi_file('out.midi')
import torch # import sys, os # print(os.path.dirname(os.path.abspath('__file__'))) # # sys.path.append('/data2/qt/MusicGeneration/mg/model') # print(os.path.dirname(os.path.abspath('__file__'))) from sequence import EventSeq device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') event_dim = EventSeq.dim() pad_token = EventSeq.dim() # token_sos = event_dim + 1 # token_eos = event_dim + 2 # vocab_size = event_dim + 3 vocab_size = EventSeq.dim() + 1 save_path = '/data2/qt/MusicGeneration/mg/model/MusicTransformer/output/' condition_file = None#'/data2/qt/MusicGeneration/egs/dataset/maestro/train/MIDI-Unprocessed_Recital1-3_MID--AUDIO_03_R1_2018_wav--1.midi' length = 2000 threshold_len = 500 pickle_dir = '/data2/qt/MusicGeneration/egs/dataset/maestro/' load_path = None dropout = 0.2 debug = False num_layers = 6 max_seq = 2048 embedding_dim = 256
from progress.bar import Bar import config, utils from config import device from data import Dataset from model import PerformanceRNN from sequence import EventSeq, ControlSeq # pylint: disable=E1101 #======================================================================== # Discriminator #======================================================================== discriminator_config = { 'event_dim': EventSeq.dim(), 'hidden_dim': 512, 'gru_layers': 3, 'gru_dropout': 0.3 } class EventSequenceEncoder(nn.Module): def __init__(self, event_dim=EventSeq.dim(), hidden_dim=512, gru_layers=3, gru_dropout=0.3): super().__init__() self.event_embedding = nn.Embedding(event_dim, hidden_dim) self.gru = nn.GRU(hidden_dim,
cnt_dict['index-' + str(index)] = 1 return cnt_arr print(par.vocab_size) data = Data('dataset/processed') # ds = DataSequence('dataset/processed', 10, 2048) sample = data.seq2seq_batch(1000, 100)[0] pprint.pprint(list(sample)) arr = count_dict(par.vocab_size + 3, sample) pprint.pprint(arr) from sequence import EventSeq, Event event_cnt = {'note_on': 0, 'note_off': 0, 'velocity': 0, 'time_shift': 0} for event_index in range(len(arr)): for event_type, feat_range in EventSeq.feat_ranges().items(): if feat_range.start <= event_index < feat_range.stop: print(event_type + ':' + str(arr[event_index]) + ' event cnt: ' + str(event_cnt)) event_cnt[event_type] += arr[event_index] # event_value = event_index - feat_range.start # events.append(Event(event_type, time, event_value)) # if event_type == 'time_shift': # time += EventSeq.time_shift_bins[event_value] # break print(event_cnt) # print(np.max(sample), np.min(sample)) # print([data._get_seq(file).shape for file in data.files])