def train_and_evaluate_model(model, X_tr, y_tr, X_cv, y_cv): xtr, mtr = X_tr xcv, mcv = X_cv hist = model.fit_generator( get_data_generator(datagen, xtr, mtr, ytr, batch_size=batch_size), steps_per_epoch=np.ceil(float(len(xtr)) / float(batch_size)), epochs=epochs, verbose=2, validation_data=get_data_generator(datagen, xcv, mcv, ycv, batch_size=batch_size), validation_steps=np.ceil(float(len(xcv)) / float(batch_size)), callbacks=get_callbacks()) best_epoch, loss, acc, val_loss, val_acc = get_best_history( hist.history, monitor='val_loss', mode='min') print() print("Best epoch: {}".format(best_epoch)) print( "loss: {:0.6f} - acc: {:0.4f} - val_loss: {:0.6f} - val_acc: {:0.4f}". format(loss, acc, val_loss, val_acc)) print() return val_loss
def main(): args = parse_args() args.verbose = True # prime file validation if args.prime_file and not os.path.exists(args.prime_file): utils.log( 'Error: prime file {} does not exist. Exiting.'.format( args.prime_file), True) exit(1) else: if not os.path.isdir(args.data_dir): utils.log( 'Error: data dir {} does not exist. Exiting.'.format( args.prime_file), True) exit(1) midi_files = [args.prime_file] if args.prime_file else \ [os.path.join(args.data_dir, f) for f in os.listdir(args.data_dir) \ if '.mid' in f or '.midi' in f] experiment_dir = get_experiment_dir(args.experiment_dir) utils.log('Using {} as --experiment_dir'.format(experiment_dir), args.verbose) if not args.save_dir: args.save_dir = os.path.join(experiment_dir, 'generated') if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) utils.log('Created directory {}'.format(args.save_dir), args.verbose) if not args.from_checkpoint: model, epoch = train.get_model(args, experiment_dir=experiment_dir) utils.log( 'Model loaded from {}'.format( os.path.join(experiment_dir, 'model.json')), args.verbose) else: # Load from checkpoint with open(os.path.join(experiment_dir, 'model.json'), 'r') as f: model = utils.model_from_json(f.read()) epoch = int(args.from_checkpoint) newest_checkpoint = os.path.join( experiment_dir, f"checkpoints/checkpoint-epoch_{args.from_checkpoint}.hdf5") utils.load_checkpoint(model, newest_checkpoint) utils.log('Model loaded from checkpoint {}'.format(newest_checkpoint), args.verbose) window_size = model.layers[0].get_input_shape_at(0)[1] seed_generator = utils.get_data_generator( midi_files, window_size=window_size, batch_size=32, num_threads=1, use_instrument=args.use_instrument, ignore_empty=args.ignore_empty, encode_section=args.encode_section, max_files_in_ram=10) # validate midi instrument name try: # try and parse the instrument name as an int instrument_num = int(args.midi_instrument) if not (instrument_num >= 0 and instrument_num <= 127): utils.log('Error: {} is not a supported instrument. Number values must be ' \ 'be 0-127. Exiting'.format(args.midi_instrument), True) exit(1) args.midi_instrument = pretty_midi.program_to_instrument_name( instrument_num) except ValueError as err: # if the instrument name is a string try: # validate that it can be converted to a program number _ = pretty_midi.instrument_name_to_program(args.midi_instrument) except ValueError as er: utils.log('Error: {} is not a valid General MIDI instrument. Exiting.' \ .format(args.midi_instrument), True) exit(1) if args.multi_instruments: if not args.prime_file: utils.log( 'Error: You need to specify a prime file when generating a multi instrument track. Exiting.', True) exit(1) utils.log(f"Sampling from single seed file: {args.prime_file}", args.verbose) generated_midi = pretty_midi.PrettyMIDI(initial_tempo=80) source_midi = utils.parse_midi(args.prime_file) melody_instruments = source_midi.instruments # melody_instruments = utils.filter_monophonic(source_midi.instruments, 1.0) for instrument in melody_instruments: instrument_group = utils.get_family_id_by_instrument_normalized( instrument.program) # Get source track seed X, y = [], [] windows = utils._encode_sliding_windows(instrument, window_size) for w in windows: if np.min(w[0][:, 0]) == 1: # Window only contains pauses.. ignore! continue X.append(w[0]) if len(X) <= 5: continue seed = X[random.randint(0, len(X) - 1)] # Generate track for this instrument generated = [] buf = np.copy(seed).tolist() while len(generated) < args.file_length: buf_expanded = [x for x in buf] # Add instrument class to input if args.use_instrument: buf_expanded = [[instrument_group] + x for x in buf_expanded] # Add section encoding to input if args.encode_section: sections = [0] * 4 active_section = int( (len(generated) / args.file_length) * 4) sections[active_section] = 1 buf_expanded = [sections + x for x in buf_expanded] # Get prediction arr = np.expand_dims(np.asarray(buf_expanded), 0) pred = model.predict(arr) # prob distribution sampling index = np.random.choice(range(0, seed.shape[1]), p=pred[0]) pred = np.zeros(seed.shape[1]) pred[index] = 1 generated.append(pred) buf.pop(0) buf.append(pred.tolist()) # Create instrument instrument = utils._network_output_to_instrument( generated, instrument.program) # Add to target midi generated_midi.instruments.append(instrument) if len(generated_midi.instruments) == 0: raise Exception( f"Found no monophonic instruments in {args.prime_file}") # Save midi time = datetime.now().strftime("%Y%m%d%H%M%S") sample_name = f"{args.save_dir}/sampled_{time}.mid" print(f"Writing generated sample to {sample_name}") generated_midi.write(sample_name) else: # generate 10 tracks using random seeds utils.log('Loading seed files...', args.verbose) X, y = next(seed_generator) generated = utils.generate(model, X, window_size, args.file_length, args.num_files, args.midi_instrument, use_instrument=args.use_instrument, encode_section=args.encode_section) for i, midi in enumerate(generated): file = os.path.join( args.save_dir, f"{i+1}_instrument{midi.instruments[0].program}.mid") midi.write(file.format(i + 1)) utils.log('wrote midi file to {}'.format(file), True)
import utils from midi2audio import FluidSynth midi_files = [os.path.join("vivaldi", path) for path in os.listdir("vivaldi") \ if '.mid' in path or '.midi' in path] # generate 10 tracks using random seeds print('enter seed (1-50)') seed = int(input()) seed_generator = utils.get_data_generator(midi_files, window_size=50, batch_size=1, num_threads=1, max_files_in_ram=10) window = 50 # length of window length = 100 # number of events number = 10 # number of samples instrument = 'Acoustic Grand Piano' # full list is here https://www.midi.org/specifications/item/gm-level-1-sound-set print('enter window size') window = int(input()) print('enter lenght of sample') length = int(input()) print('enter number of samples') number = int(input()) print('enter instrument (for example Acoustic Grand Piano)')
def main(): args = parse_args() args.verbose = True # prime file validation if args.prime_file and not os.path.exists(args.prime_file): utils.log( 'Error: prime file {} does not exist. Exiting.'.format( args.prime_file), True) exit(1) else: if not os.path.isdir(args.data_dir): utils.log( 'Error: data dir {} does not exist. Exiting.'.format( args.prime_file), True) exit(1) midi_files = [ args.prime_file ] if args.prime_file else \ [ os.path.join(args.data_dir, f) for f in os.listdir(args.data_dir) \ if '.mid' in f or '.midi' in f ] experiment_dir = get_experiment_dir(args.experiment_dir) utils.log('Using {} as --experiment_dir'.format(experiment_dir), args.verbose) if not args.save_dir: args.save_dir = os.path.join(experiment_dir, 'generated') if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) utils.log('Created directory {}'.format(args.save_dir), args.verbose) model, epoch = train.get_model(args, experiment_dir=experiment_dir) utils.log( 'Model loaded from {}'.format( os.path.join(experiment_dir, 'model.json')), args.verbose) window_size = model.layers[0].get_input_shape_at(0)[1] seed_generator = utils.get_data_generator(midi_files, window_size=window_size, batch_size=32, num_threads=1, max_files_in_ram=10) # validate midi instrument name try: # try and parse the instrument name as an int instrument_num = int(args.midi_instrument) if not (instrument_num >= 0 and instrument_num <= 127): utils.log('Error: {} is not a supported instrument. Number values must be ' \ 'be 0-127. Exiting'.format(args.midi_instrument), True) exit(1) args.midi_instrument = pretty_midi.program_to_instrument_name( instrument_num) except ValueError as err: # if the instrument name is a string try: # validate that it can be converted to a program number _ = pretty_midi.instrument_name_to_program(args.midi_instrument) except ValueError as er: utils.log('Error: {} is not a valid General MIDI instrument. Exiting.'\ .format(args.midi_instrument), True) exit(1) # generate 10 tracks using random seeds utils.log('Loading seed files...', args.verbose) X, y = next(seed_generator) generated = utils.generate(model, X, window_size, args.file_length, args.num_files, args.midi_instrument) for i, midi in enumerate(generated): file = os.path.join(args.save_dir, '{}.mid'.format(i + 1)) midi.write(file.format(i + 1)) utils.log('wrote midi file to {}'.format(file), True)
def main(): args = parse_args() args.verbose = True try: # get paths to midi files in --data_dir midi_files = [os.path.join(args.data_dir, path) \ for path in os.listdir(args.data_dir) \ if '.mid' in path or '.midi' in path] except OSError as e: utils.log( 'Error: Invalid --data_dir, {} directory does not exist. Exiting.', args.verbose) exit(1) utils.log( 'Found {} midi files in {}'.format(len(midi_files), args.data_dir), args.verbose) if len(midi_files) < 1: utils.log( 'Error: no midi files found in {}. Exiting.'.format(args.data_dir), args.verbose) exit(1) # create the experiment directory and return its name experiment_dir = utils.create_experiment_dir(args.experiment_dir, args.verbose) # write --message to experiment_dir if args.message: with open(os.path.join(experiment_dir, 'message.txt'), 'w') as f: f.write(args.message) utils.log( 'Wrote {} bytes to {}'.format( len(args.message), os.path.join(experiment_dir, 'message.txt')), args.verbose) val_split = 0.2 # use 20 percent for validation val_split_index = int(float(len(midi_files)) * val_split) # use generators to lazy load train/validation data, ensuring that the # user doesn't have to load all midi files into RAM at once train_generator = utils.get_data_generator( midi_files[0:val_split_index], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, max_files_in_ram=args.max_files_in_ram) val_generator = utils.get_data_generator( midi_files[val_split_index:], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, max_files_in_ram=args.max_files_in_ram) model, epoch = get_model(args) if args.verbose: print(model.summary()) utils.save_model(model, experiment_dir) utils.log( 'Saved model to {}'.format(os.path.join(experiment_dir, 'model.json')), args.verbose) callbacks = get_callbacks(experiment_dir) print('fitting model...') # this is a somewhat magic number which is the average number of length-20 windows # calculated from ~5K MIDI files from the Lakh MIDI Dataset. magic_number = 827 start_time = time.time() model.fit_generator(train_generator, steps_per_epoch=len(midi_files) * magic_number / args.batch_size, epochs=args.num_epochs, validation_data=val_generator, validation_steps=len(midi_files) * 0.2 * magic_number / args.batch_size, verbose=args.verbosity, callbacks=callbacks, initial_epoch=epoch) utils.log('Finished in {:.2f} seconds'.format(time.time() - start_time), args.verbose)
midi_files = [os.path.join("data1", path) for path in os.listdir("data1") \ if '.mid' in path or '.midi' in path] print(len(midi_files)) experiment_dir = utils.create_experiment_dir('experiment_dir4', 1) val_split = 0.2 val_split_index = int(float(len(midi_files)) * val_split) train_generator = utils.get_data_generator(midi_files[0:val_split_index]) val_generator = utils.get_data_generator(midi_files[val_split_index:]) callbacks = utils.get_callbacks(experiment_dir) batch_size = 60 start_time = time.time() num_epochs = 10 model.fit_generator(train_generator, steps_per_epoch=len(midi_files) * 600 / batch_size, epochs=num_epochs, validation_data=val_generator, validation_steps=0.2 * len(midi_files) * 600 / batch_size, verbose=1,
def main(): args = parse_args() args.verbose = True # create the experiment directory and return its name experiment_dir = utils.create_experiment_dir(args.experiment_dir, args.verbose) with open(os.path.join(experiment_dir, 'arguments.json'), 'w') as f: json.dump(args.__dict__, f, indent=2) val_split = 0.3 # use 30 percent for validation num_tracks = 0 if args.pickle_file is not None: if not os.path.exists(args.pickle_file): utils.log( 'Error: pickle file {} does not exist. Exiting.'.format( args.pickle_file), True) exit(1) with open(args.pickle_file, 'rb') as file: tracks = pickle.load(file) random.shuffle(tracks) # individual tracks can be randomized num_tracks = len(tracks) val_split_index = int(float(num_tracks) * val_split) train_generator = utils.get_prepared_data_generator( tracks[val_split_index:], window_size=args.window_size, batch_size=args.batch_size, use_instrument=args.use_instrument, ignore_empty=args.ignore_empty, encode_section=args.encode_section, max_tracks_in_ram=args.max_files_in_ram) val_generator = utils.get_prepared_data_generator( tracks[0:val_split_index], window_size=args.window_size, batch_size=args.batch_size, use_instrument=args.use_instrument, ignore_empty=args.ignore_empty, encode_section=args.encode_section, max_tracks_in_ram=args.max_files_in_ram, shuffle_batches=True) else: try: # get paths to midi files in --data_dir midi_files = [os.path.join(args.data_dir, path) \ for path in os.listdir(args.data_dir) \ if '.mid' in path or '.midi' in path] except OSError as e: log( 'Error: Invalid --data_dir, {} directory does not exist. Exiting.', args.verbose) exit(1) utils.log( 'Found {} midi files in {}'.format(len(midi_files), args.data_dir), args.verbose) if len(midi_files) < 1: utils.log( 'Error: no midi files found in {}. Exiting.'.format( args.data_dir), args.verbose) exit(1) num_tracks = len(midi_files) val_split_index = int(float(num_tracks) * val_split) # use generators to lazy load train/validation data, ensuring that the # user doesn't have to load all midi files into RAM at once train_generator = utils.get_data_generator( midi_files[0:val_split_index], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, use_instrument=args.use_instrument, ignore_empty=args.ignore_empty, encode_section=args.encode_section, max_files_in_ram=args.max_files_in_ram) val_generator = utils.get_data_generator( midi_files[val_split_index:], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, use_instrument=args.use_instrument, ignore_empty=args.ignore_empty, encode_section=args.encode_section, max_files_in_ram=args.max_files_in_ram) # Load model model, epoch = get_model(args) if args.verbose: print(model.summary()) utils.save_model(model, experiment_dir) utils.log( 'Saved model to {}'.format(os.path.join(experiment_dir, 'model.json')), args.verbose) callbacks = get_callbacks(experiment_dir) print('fitting model...') magic_number = 500 start_time = time.time() model.fit_generator( train_generator, steps_per_epoch=num_tracks * magic_number / args.batch_size, epochs=args.num_epochs, validation_data=val_generator, validation_steps=num_tracks * .1 * magic_number / args.batch_size, verbose=1, callbacks=callbacks, initial_epoch=epoch) utils.log('Finished in {:.2f} seconds'.format(time.time() - start_time), args.verbose)
False) train_decoder_batch = get_batch_generator(train_decoder_tokens, train_max_word_len, target_oh_encoder, BATCH_SIZE) train_target_batch = get_batch_generator(train_target_tokens, train_max_word_len, target_oh_encoder, BATCH_SIZE) val_encoder_batch = get_batch_generator(val_encoder_tokens, val_max_word_len, input_oh_encoder, BATCH_SIZE, False) val_decoder_batch = get_batch_generator(val_decoder_tokens, val_max_word_len, target_oh_encoder, BATCH_SIZE) val_target_batch = get_batch_generator(val_target_tokens, val_max_word_len, target_oh_encoder, BATCH_SIZE) train_loader = get_data_generator(train_encoder_batch, train_decoder_batch, train_target_batch) val_loader = get_data_generator(val_encoder_batch, val_decoder_batch, val_target_batch) model.fit(train_loader, steps_per_epoch=train_steps, epochs=EPOCHS, verbose=1, validation_data=val_loader, validation_steps=val_steps, callbacks=[tensorboard_callback]) model.save(f'{model_name}.h5')
def main(): args = parse_args() args.verbose = True try: # get paths to midi files in --data_dir midi_files = [os.path.join(args.data_dir, path) \ for path in os.listdir(args.data_dir) \ if '.mid' in path or '.midi' in path] except OSError as e: log('Error: Invalid --data_dir, {} directory does not exist. Exiting.', args.verbose) exit(1) utils.log( 'Found {} midi files in {}'.format(len(midi_files), args.data_dir), args.verbose ) if len(midi_files) < 1: utils.log( 'Error: no midi files found in {}. Exiting.'.format(args.data_dir), args.verbose ) exit(1) # create the experiment directory and return its name experiment_dir = utils.create_experiment_dir(args.experiment_dir, args.verbose) # write --message to experiment_dir if args.message: with open(os.path.join(experiment_dir, 'message.txt'), 'w') as f: f.write(args.message) utils.log('Wrote {} bytes to {}'.format(len(args.message), os.path.join(experiment_dir, 'message.txt')), args.verbose) val_split = 0.2 # use 20 percent for validation val_split_index = int(float(len(midi_files)) * val_split) # use generators to lazy load train/validation data, ensuring that the # user doesn't have to load all midi files into RAM at once train_generator = utils.get_data_generator(midi_files[0:val_split_index], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, max_files_in_ram=args.max_files_in_ram) val_generator = utils.get_data_generator(midi_files[val_split_index:], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, max_files_in_ram=args.max_files_in_ram) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #device = 'cpu' print(device) model, epoch = get_model(args) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) if args.verbose: print(model) utils.save_model(model, experiment_dir) utils.log('Saved model to {}'.format(os.path.join(experiment_dir, 'model.pth')), args.verbose) #callbacks = get_callbacks(experiment_dir) criterion = nn.NLLLoss().float() optimizer = get_optimizer(args, model) print('fitting model...') start_time = time.time() train_losses, val_losses = [], [] for e in range(args.num_epochs): print('Epoch', e+1) running_loss = 0 len_train_generator = 0 len_val_generator = 0 for x, y in train_generator: x, y = x.to(device), y.to(device) optimizer.zero_grad() log_ps = model(x.float()) loss = criterion(log_ps, y.long()) loss.backward() optimizer.step() running_loss += loss.item() len_train_generator += 1 #print(len_train_generator) else: val_loss = 0 accuracy = 0 with torch.no_grad(): model.eval() for x, y in val_generator: x, y = x.to(device), y.to(device) log_ps = model(x.float()) val_loss += criterion(log_ps, y.long()) # convert log probabilities to probabilites ps = torch.exp(log_ps) # select the highest top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type(torch.FloatTensor)) len_val_generator += 1 # set the model back to train mode after the eval mode model.train() train_losses.append(running_loss/len_train_generator) val_losses.append(val_loss/len_val_generator) print("\nEpoch: {}/{}.. ".format(e+1, epochs), "Training Loss: {:.3f} ".format(running_loss/len_train_generator), "Validation Loss: {:.3f} ".format(val_loss/len_val_generator), "Validation Accuracy: {:.3f}".format(accuracy/len_val_generator)) utils.save_model(model, experiment_dir) # Model Checkpoint # if val_losses[-1] < best_val_loss: # print('Validation loss improved from {:.3f} to {:.3f}, saving the model.'.format(best_val_loss, # val_losses[-1])) # best_val_loss = val_losses[-1] # checkpoint = {'model': model, # 'idx_to_class': idx_to_class} # torch.save(checkpoint, 'checkpoint.pth') utils.log('Finished in {:.2f} seconds'.format(time.time() - start_time), args.verbose)