def main(): args = parse_args() args.verbose = True try: # get paths to midi files in --data_dir midi_files = [os.path.join(args.data_dir, path) \ for path in os.listdir(args.data_dir) \ if '.mid' in path or '.midi' in path] except OSError as e: utils.log( 'Error: Invalid --data_dir, {} directory does not exist. Exiting.', args.verbose) exit(1) utils.log( 'Found {} midi files in {}'.format(len(midi_files), args.data_dir), args.verbose) if len(midi_files) < 1: utils.log( 'Error: no midi files found in {}. Exiting.'.format(args.data_dir), args.verbose) exit(1) # create the experiment directory and return its name experiment_dir = utils.create_experiment_dir(args.experiment_dir, args.verbose) # write --message to experiment_dir if args.message: with open(os.path.join(experiment_dir, 'message.txt'), 'w') as f: f.write(args.message) utils.log( 'Wrote {} bytes to {}'.format( len(args.message), os.path.join(experiment_dir, 'message.txt')), args.verbose) val_split = 0.2 # use 20 percent for validation val_split_index = int(float(len(midi_files)) * val_split) # use generators to lazy load train/validation data, ensuring that the # user doesn't have to load all midi files into RAM at once train_generator = utils.get_data_generator( midi_files[0:val_split_index], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, max_files_in_ram=args.max_files_in_ram) val_generator = utils.get_data_generator( midi_files[val_split_index:], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, max_files_in_ram=args.max_files_in_ram) model, epoch = get_model(args) if args.verbose: print(model.summary()) utils.save_model(model, experiment_dir) utils.log( 'Saved model to {}'.format(os.path.join(experiment_dir, 'model.json')), args.verbose) callbacks = get_callbacks(experiment_dir) print('fitting model...') # this is a somewhat magic number which is the average number of length-20 windows # calculated from ~5K MIDI files from the Lakh MIDI Dataset. magic_number = 827 start_time = time.time() model.fit_generator(train_generator, steps_per_epoch=len(midi_files) * magic_number / args.batch_size, epochs=args.num_epochs, validation_data=val_generator, validation_steps=len(midi_files) * 0.2 * magic_number / args.batch_size, verbose=args.verbosity, callbacks=callbacks, initial_epoch=epoch) utils.log('Finished in {:.2f} seconds'.format(time.time() - start_time), args.verbose)
# 1) Load the pretrained network. try: model = load_model(path.join(model_path, model_name)) # utils里自定义的 except: logfile.write( "Model not found! Provide a pre-trained model model as input.\n" ) exit(1) # plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=False) # SVG(model_to_dot(model).create(prog='dot', format='svg')) experiment_name = create_experiment_dir( experiment_path, model_name, # utils里自定义的 selected_class, step_size, approach, susp_num, repeat) #Fault localization is done per class. X_val, Y_val = filter_val_set(selected_class, X_test, Y_test) # utils里自定义的 #################### # 2)test the model and receive the indexes of correct and incorrect classifications # Also provide output of each neuron in each layer for test input x. filename = experiment_path + '/' + model_name + '_' + str( selected_class) #C0, C1, ... plot_model(model,
def main(): args = parse_args() args.verbose = True # create the experiment directory and return its name experiment_dir = utils.create_experiment_dir(args.experiment_dir, args.verbose) with open(os.path.join(experiment_dir, 'arguments.json'), 'w') as f: json.dump(args.__dict__, f, indent=2) val_split = 0.3 # use 30 percent for validation num_tracks = 0 if args.pickle_file is not None: if not os.path.exists(args.pickle_file): utils.log( 'Error: pickle file {} does not exist. Exiting.'.format( args.pickle_file), True) exit(1) with open(args.pickle_file, 'rb') as file: tracks = pickle.load(file) random.shuffle(tracks) # individual tracks can be randomized num_tracks = len(tracks) val_split_index = int(float(num_tracks) * val_split) train_generator = utils.get_prepared_data_generator( tracks[val_split_index:], window_size=args.window_size, batch_size=args.batch_size, use_instrument=args.use_instrument, ignore_empty=args.ignore_empty, encode_section=args.encode_section, max_tracks_in_ram=args.max_files_in_ram) val_generator = utils.get_prepared_data_generator( tracks[0:val_split_index], window_size=args.window_size, batch_size=args.batch_size, use_instrument=args.use_instrument, ignore_empty=args.ignore_empty, encode_section=args.encode_section, max_tracks_in_ram=args.max_files_in_ram, shuffle_batches=True) else: try: # get paths to midi files in --data_dir midi_files = [os.path.join(args.data_dir, path) \ for path in os.listdir(args.data_dir) \ if '.mid' in path or '.midi' in path] except OSError as e: log( 'Error: Invalid --data_dir, {} directory does not exist. Exiting.', args.verbose) exit(1) utils.log( 'Found {} midi files in {}'.format(len(midi_files), args.data_dir), args.verbose) if len(midi_files) < 1: utils.log( 'Error: no midi files found in {}. Exiting.'.format( args.data_dir), args.verbose) exit(1) num_tracks = len(midi_files) val_split_index = int(float(num_tracks) * val_split) # use generators to lazy load train/validation data, ensuring that the # user doesn't have to load all midi files into RAM at once train_generator = utils.get_data_generator( midi_files[0:val_split_index], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, use_instrument=args.use_instrument, ignore_empty=args.ignore_empty, encode_section=args.encode_section, max_files_in_ram=args.max_files_in_ram) val_generator = utils.get_data_generator( midi_files[val_split_index:], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, use_instrument=args.use_instrument, ignore_empty=args.ignore_empty, encode_section=args.encode_section, max_files_in_ram=args.max_files_in_ram) # Load model model, epoch = get_model(args) if args.verbose: print(model.summary()) utils.save_model(model, experiment_dir) utils.log( 'Saved model to {}'.format(os.path.join(experiment_dir, 'model.json')), args.verbose) callbacks = get_callbacks(experiment_dir) print('fitting model...') magic_number = 500 start_time = time.time() model.fit_generator( train_generator, steps_per_epoch=num_tracks * magic_number / args.batch_size, epochs=args.num_epochs, validation_data=val_generator, validation_steps=num_tracks * .1 * magic_number / args.batch_size, verbose=1, callbacks=callbacks, initial_epoch=epoch) utils.log('Finished in {:.2f} seconds'.format(time.time() - start_time), args.verbose)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) # actual training midi_files = [os.path.join("data1", path) for path in os.listdir("data1") \ if '.mid' in path or '.midi' in path] print(len(midi_files)) experiment_dir = utils.create_experiment_dir('experiment_dir4', 1) val_split = 0.2 val_split_index = int(float(len(midi_files)) * val_split) train_generator = utils.get_data_generator(midi_files[0:val_split_index]) val_generator = utils.get_data_generator(midi_files[val_split_index:]) callbacks = utils.get_callbacks(experiment_dir) batch_size = 60 start_time = time.time() num_epochs = 10
def main(): args = parse_args() args.verbose = True try: # get paths to midi files in --data_dir midi_files = [os.path.join(args.data_dir, path) \ for path in os.listdir(args.data_dir) \ if '.mid' in path or '.midi' in path] except OSError as e: log('Error: Invalid --data_dir, {} directory does not exist. Exiting.', args.verbose) exit(1) utils.log( 'Found {} midi files in {}'.format(len(midi_files), args.data_dir), args.verbose ) if len(midi_files) < 1: utils.log( 'Error: no midi files found in {}. Exiting.'.format(args.data_dir), args.verbose ) exit(1) # create the experiment directory and return its name experiment_dir = utils.create_experiment_dir(args.experiment_dir, args.verbose) # write --message to experiment_dir if args.message: with open(os.path.join(experiment_dir, 'message.txt'), 'w') as f: f.write(args.message) utils.log('Wrote {} bytes to {}'.format(len(args.message), os.path.join(experiment_dir, 'message.txt')), args.verbose) val_split = 0.2 # use 20 percent for validation val_split_index = int(float(len(midi_files)) * val_split) # use generators to lazy load train/validation data, ensuring that the # user doesn't have to load all midi files into RAM at once train_generator = utils.get_data_generator(midi_files[0:val_split_index], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, max_files_in_ram=args.max_files_in_ram) val_generator = utils.get_data_generator(midi_files[val_split_index:], window_size=args.window_size, batch_size=args.batch_size, num_threads=args.n_jobs, max_files_in_ram=args.max_files_in_ram) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #device = 'cpu' print(device) model, epoch = get_model(args) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) if args.verbose: print(model) utils.save_model(model, experiment_dir) utils.log('Saved model to {}'.format(os.path.join(experiment_dir, 'model.pth')), args.verbose) #callbacks = get_callbacks(experiment_dir) criterion = nn.NLLLoss().float() optimizer = get_optimizer(args, model) print('fitting model...') start_time = time.time() train_losses, val_losses = [], [] for e in range(args.num_epochs): print('Epoch', e+1) running_loss = 0 len_train_generator = 0 len_val_generator = 0 for x, y in train_generator: x, y = x.to(device), y.to(device) optimizer.zero_grad() log_ps = model(x.float()) loss = criterion(log_ps, y.long()) loss.backward() optimizer.step() running_loss += loss.item() len_train_generator += 1 #print(len_train_generator) else: val_loss = 0 accuracy = 0 with torch.no_grad(): model.eval() for x, y in val_generator: x, y = x.to(device), y.to(device) log_ps = model(x.float()) val_loss += criterion(log_ps, y.long()) # convert log probabilities to probabilites ps = torch.exp(log_ps) # select the highest top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type(torch.FloatTensor)) len_val_generator += 1 # set the model back to train mode after the eval mode model.train() train_losses.append(running_loss/len_train_generator) val_losses.append(val_loss/len_val_generator) print("\nEpoch: {}/{}.. ".format(e+1, epochs), "Training Loss: {:.3f} ".format(running_loss/len_train_generator), "Validation Loss: {:.3f} ".format(val_loss/len_val_generator), "Validation Accuracy: {:.3f}".format(accuracy/len_val_generator)) utils.save_model(model, experiment_dir) # Model Checkpoint # if val_losses[-1] < best_val_loss: # print('Validation loss improved from {:.3f} to {:.3f}, saving the model.'.format(best_val_loss, # val_losses[-1])) # best_val_loss = val_losses[-1] # checkpoint = {'model': model, # 'idx_to_class': idx_to_class} # torch.save(checkpoint, 'checkpoint.pth') utils.log('Finished in {:.2f} seconds'.format(time.time() - start_time), args.verbose)