def use_me_process(sources_list, output_file_names): """The usage process. :param sources_list: The file names to be used. :type sources_list: list[str] :param output_file_names: The output file names to be used. :type output_file_names: list[list[str]] """ print('\n-- Welcome to MaD TwinNet.') if debug: print( '\n-- Cannot proceed in debug mode. Please set debug=False at the settings file.' ) print('-- Exiting.') exit(-1) print( '-- Now I will extract the voice and the background music from the provided files' ) # Masker modules rnn_enc = RNNEnc(hyper_parameters['reduced_dim'], hyper_parameters['context_length'], debug) rnn_dec = RNNDec(hyper_parameters['rnn_enc_output_dim'], debug) fnn = FNNMasker(hyper_parameters['rnn_enc_output_dim'], hyper_parameters['original_input_dim'], hyper_parameters['context_length']) # Denoiser modules denoiser = FNNDenoiser(hyper_parameters['original_input_dim']) rnn_enc.load_state_dict(torch.load(output_states_path['rnn_enc'])) rnn_dec.load_state_dict(torch.load(output_states_path['rnn_dec'])) fnn.load_state_dict(torch.load(output_states_path['fnn'])) denoiser.load_state_dict(torch.load(output_states_path['denoiser'])) if not debug and torch.has_cudnn: rnn_enc = rnn_enc.cuda() rnn_dec = rnn_dec.cuda() fnn = fnn.cuda() denoiser = denoiser.cuda() testing_it = data_feeder_testing( window_size=hyper_parameters['window_size'], fft_size=hyper_parameters['fft_size'], hop_size=hyper_parameters['hop_size'], seq_length=hyper_parameters['seq_length'], context_length=hyper_parameters['context_length'], batch_size=1, debug=debug, sources_list=sources_list) print('-- Let\'s go!\n') total_time = 0 for index, data in enumerate(testing_it()): s_time = time.time() mix, mix_magnitude, mix_phase, voice_true, bg_true = data voice_predicted = np.zeros( (mix_magnitude.shape[0], hyper_parameters['seq_length'] - hyper_parameters['context_length'] * 2, hyper_parameters['window_size']), dtype=np.float32) for batch in range( int(mix_magnitude.shape[0] / training_constants['batch_size'])): b_start = batch * training_constants['batch_size'] b_end = (batch + 1) * training_constants['batch_size'] v_in = Variable( torch.from_numpy(mix_magnitude[b_start:b_end, :, :])) if not debug and torch.has_cudnn: v_in = v_in.cuda() tmp_voice_predicted = rnn_enc(v_in) tmp_voice_predicted = rnn_dec(tmp_voice_predicted) tmp_voice_predicted = fnn(tmp_voice_predicted, v_in) tmp_voice_predicted = denoiser(tmp_voice_predicted) voice_predicted[ b_start:b_end, :, :] = tmp_voice_predicted.data.cpu().numpy() data_process_results_testing( index=index, voice_true=voice_true, bg_true=bg_true, voice_predicted=voice_predicted, window_size=hyper_parameters['window_size'], mix=mix, mix_magnitude=mix_magnitude, mix_phase=mix_phase, hop=hyper_parameters['hop_size'], context_length=hyper_parameters['context_length'], output_file_name=output_file_names[index]) e_time = time.time() print( usage_output_string_per_example.format(f=sources_list[index], t=e_time - s_time)) total_time += e_time - s_time print('\n-- Testing finished\n') print(usage_output_string_total.format(t=total_time)) print('-- That\'s all folks!')
def testing_process(): """The testing process. """ device = 'cuda' if not debug and torch.cuda.is_available() else 'cpu' print('\n-- Starting testing process. Debug mode: {}'.format(debug)) print('-- Process on: {}'.format(device), end='\n\n') print('-- Setting up modules... ', end='') # Masker modules rnn_enc = RNNEnc(hyper_parameters['reduced_dim'], hyper_parameters['context_length'], debug) rnn_dec = RNNDec(hyper_parameters['rnn_enc_output_dim'], debug) fnn = FNNMasker(hyper_parameters['rnn_enc_output_dim'], hyper_parameters['original_input_dim'], hyper_parameters['context_length']) # Denoiser modules denoiser = FNNDenoiser(hyper_parameters['original_input_dim']) rnn_enc.load_state_dict(torch.load( output_states_path['rnn_enc'])).to(device) rnn_dec.load_state_dict(torch.load( output_states_path['rnn_dec'])).to(device) fnn.load_state_dict(torch.load(output_states_path['fnn'])).to(device) denoiser.load_state_dict(torch.load( output_states_path['denoiser'])).to(device) print('done.') testing_it = data_feeder_testing( window_size=hyper_parameters['window_size'], fft_size=hyper_parameters['fft_size'], hop_size=hyper_parameters['hop_size'], seq_length=hyper_parameters['seq_length'], context_length=hyper_parameters['context_length'], batch_size=1, debug=debug) print('-- Testing starts\n') sdr = [] sir = [] total_time = 0 for index, data in enumerate(testing_it()): s_time = time.time() mix, mix_magnitude, mix_phase, voice_true, bg_true = data voice_predicted = np.zeros( (mix_magnitude.shape[0], hyper_parameters['seq_length'] - hyper_parameters['context_length'] * 2, hyper_parameters['window_size']), dtype=np.float32) for batch in range( int(mix_magnitude.shape[0] / training_constants['batch_size'])): b_start = batch * training_constants['batch_size'] b_end = (batch + 1) * training_constants['batch_size'] v_in = torch.from_numpy( mix_magnitude[b_start:b_end, :, :]).to(device) tmp_voice_predicted = rnn_enc(v_in) tmp_voice_predicted = rnn_dec(tmp_voice_predicted) tmp_voice_predicted = fnn(tmp_voice_predicted, v_in) tmp_voice_predicted = denoiser(tmp_voice_predicted) voice_predicted[ b_start:b_end, :, :] = tmp_voice_predicted.data.cpu().numpy() tmp_sdr, tmp_sir = data_process_results_testing( index=index, voice_true=voice_true, bg_true=bg_true, voice_predicted=voice_predicted, window_size=hyper_parameters['window_size'], mix=mix, mix_magnitude=mix_magnitude, mix_phase=mix_phase, hop=hyper_parameters['hop_size'], context_length=hyper_parameters['context_length']) e_time = time.time() print( testing_output_string_per_example.format( e=index, sdr=np.median([i for i in tmp_sdr[0] if not np.isnan(i)]), sir=np.median([i for i in tmp_sir[0] if not np.isnan(i)]), t=e_time - s_time)) total_time += e_time - s_time sdr.append(tmp_sdr) sir.append(tmp_sir) print('\n-- Testing finished\n') print( testing_output_string_all.format( sdr=np.median([ii for i in sdr for ii in i[0] if not np.isnan(ii)]), sir=np.median([ii for i in sir for ii in i[0] if not np.isnan(ii)]), t=total_time)) print('\n-- Saving results... ', end='') with open(metrics_paths['sdr'], 'wb') as f: pickle.dump(sdr, f, protocol=2) with open(metrics_paths['sir'], 'wb') as f: pickle.dump(sir, f, protocol=2) print('done!') print('-- That\'s all folks!')
def testing_process(): """The testing process. """ # Check what device we'll be using device = 'cuda' if not debug and cuda.is_available() else 'cpu' # Inform about the device and time and date printing.print_intro_messages(device) printing.print_msg('Starting training process. ' 'Debug mode: {}'.format(debug)) # Set up MaD TwinNet with printing.InformAboutProcess('Setting up MaD TwinNet'): mad = MaD(rnn_enc_input_dim=hyper_parameters['reduced_dim'], rnn_dec_input_dim=hyper_parameters['rnn_enc_output_dim'], original_input_dim=hyper_parameters['original_input_dim'], context_length=hyper_parameters['context_length']) with printing.InformAboutProcess('Loading states'): mad.load_state_dict(load(output_states_path['mad'])) mad = mad.to(device).eval() with printing.InformAboutProcess('Initializing data feeder'): testing_it = data_feeder.data_feeder_testing( window_size=hyper_parameters['window_size'], fft_size=hyper_parameters['fft_size'], hop_size=hyper_parameters['hop_size'], seq_length=hyper_parameters['seq_length'], context_length=hyper_parameters['context_length'], batch_size=1, debug=debug) p_testing = partial(_testing_process, mad=mad, device=device, seq_length=hyper_parameters['seq_length'], context_length=hyper_parameters['context_length'], window_size=hyper_parameters['window_size'], batch_size=training_constants['batch_size'], hop_size=hyper_parameters['hop_size']) printing.print_msg('Testing starts', end='\n\n') sdr, sir, total_time = [ e for e in zip(*[ i for index, data in enumerate(testing_it()) for i in [p_testing(data, index)] ]) ] total_time = sum(total_time) printing.print_msg('Testing finished', start='\n-- ', end='\n\n') printing.print_msg(testing_output_string_all.format( sdr=np.median([ii for i in sdr for ii in i[0] if not np.isnan(ii)]), sir=np.median([ii for i in sir for ii in i[0] if not np.isnan(ii)]), t=total_time), end='\n\n') with printing.InformAboutProcess('Saving results... '): with open(metrics_paths['sdr'], 'wb') as f: pickle.dump(sdr, f, protocol=2) with open(metrics_paths['sir'], 'wb') as f: pickle.dump(sir, f, protocol=2) printing.print_msg('That\'s all folks!')
def testing_process(): """The testing process. """ # Check what device we'll be using device = 'cuda' if not debug and cuda.is_available() else 'cpu' torch.backends.cudnn.benchmark = True # Inform about the device and time and date printing.print_intro_messages(device) printing.print_msg('Starting training process. ' 'Debug mode: {}'.format(debug)) latent_n =args.layers lats = args.layers do = args.do channels = args.channels print("Using " + str(lats) + " latent layers between encoder and decoder.", flush=True) print("Using " + str(args.channels) + " cnn channels.", flush=True) print("Using " + str(args.do/100) + " dropout.", flush=True) print(("Using residual connections" if args.residual else "Not using residual connections")) outputPath = str(lats)+str(args.channels)+str(args.do)+("res" if args.residual else "")+ _dataset_parent_dir[7:] print("Output path: " + outputPath) pt_path = ("./outputs/states/" + str(lats) + str(args.channels)+str(int(args.do))+("res" if args.residual else "")+".pt"+ _dataset_parent_dir[7:]) pt_path = "./outputs/states/mad.pt" + str(lats) + "latents" + str(args.channels) +"features"+str(args.do/100) + \ "dropout" + ("res" if args.residual else "") + str(90) + "epochs" # print("USING " + str(latent_n) + " conv layers between enc/dec") print("Weights path: " + pt_path, flush=True) # Set up MaD TwinNet with printing.InformAboutProcess('Setting up MaD TwinNet'): mad = MaDConv( cnn_channels=args.channels, inner_kernel_size=1, inner_padding=0, cnn_dropout=do/100, original_input_dim=hyper_parameters['original_input_dim'], context_length=hyper_parameters['context_length'], latent_n=latent_n, residual=args.residual ) with printing.InformAboutProcess('Loading states'): mad.load_state_dict(load(pt_path)) mad = mad.to(device).eval() with printing.InformAboutProcess('Initializing data feeder'): testing_it = data_feeder.data_feeder_testing( window_size=hyper_parameters['window_size'], fft_size=hyper_parameters['fft_size'], hop_size=hyper_parameters['hop_size'], seq_length=hyper_parameters['seq_length'], context_length=hyper_parameters['context_length'], batch_size=1, debug=debug) p_testing = partial( _testing_process, mad=mad, device=device, seq_length=hyper_parameters['seq_length'], context_length=hyper_parameters['context_length'], window_size=hyper_parameters['window_size'], batch_size=4,#training_constants['batch_size'], hop_size=hyper_parameters['hop_size'], outputPath=outputPath) printing.print_msg('Testing starts', end='\n\n') sdr, sir, sar, total_time = [e for e in zip(*[ i for index, data in enumerate(testing_it()) for i in [p_testing(data, index)]])] total_time = sum(total_time) printing.print_msg('Testing finished', start='\n-- ', end='\n\n') printing.print_msg(testing_output_string_all.format( sdr=np.median([ii for i in sdr for ii in i[0] if not np.isnan(ii)]), sir=np.median([ii for i in sir for ii in i[0] if not np.isnan(ii)]), sar=np.median([ii for i in sar for ii in i[0] if not np.isnan(ii)]), t=total_time), end='\n\n') with printing.InformAboutProcess('Saving results... '): with open(metrics_paths['sdr'], 'wb') as f: pickle.dump(sdr, f, protocol=2) with open(metrics_paths['sir'], 'wb') as f: pickle.dump(sir, f, protocol=2) with open(metrics_paths['sar'], 'wb') as f: pickle.dump(sar, f, protocol=2) print("Using " + str(lats) + " latent layers between encoder and decoder.", flush=True) print("Using " + str(args.channels) + " cnn channels.", flush=True) print("Using " + str(args.do / 100) + " dropout.", flush=True) printing.print_msg('That\'s all folks!')
def use_me_process(sources_list, output_file_names): """The usage process. :param sources_list: The file names to be used. :type sources_list: list[pathlib.Path] :param output_file_names: The output file names to be used. :type output_file_names: list[list[str]] """ printing.print_msg('Welcome to MaD TwinNet.', end='\n\n') if debug: printing.print_msg('Cannot proceed in debug mode. ' 'Please set `debug=False` at the settings ' 'file.') printing.print_msg('Exiting.') exit(-1) printing.print_msg('Now I will extract the voice and the ' 'background music from the provided files') device = 'cuda' if not debug and torch.cuda.is_available() else 'cpu' # MaD setting up mad = MaD(rnn_enc_input_dim=hyper_parameters['reduced_dim'], rnn_dec_input_dim=hyper_parameters['rnn_enc_output_dim'], original_input_dim=hyper_parameters['original_input_dim'], context_length=hyper_parameters['context_length']) mad.load_state_dict(torch.load(output_states_path['mad'])) mad = mad.to(device).eval() testing_it = data_feeder.data_feeder_testing( window_size=hyper_parameters['window_size'], fft_size=hyper_parameters['fft_size'], hop_size=hyper_parameters['hop_size'], seq_length=hyper_parameters['seq_length'], context_length=hyper_parameters['context_length'], batch_size=1, debug=debug, sources_list=sources_list) printing.print_msg('Let\'s go!', end='\n\n') total_time = 0 for index, data in enumerate(testing_it()): s_time = time.time() mix, mix_magnitude, mix_phase, voice_true, bg_true = data voice_predicted = np.zeros( (mix_magnitude.shape[0], hyper_parameters['seq_length'] - hyper_parameters['context_length'] * 2, hyper_parameters['window_size']), dtype=np.float32) for batch in range( int(mix_magnitude.shape[0] / training_constants['batch_size'])): b_start = batch * training_constants['batch_size'] b_end = (batch + 1) * training_constants['batch_size'] v_in = torch.from_numpy( mix_magnitude[b_start:b_end, :, :]).to(device) voice_predicted[b_start:b_end, :, :] = mad( v_in).v_j_filt.cpu().numpy() data_feeder.data_process_results_testing( index=index, voice_true=voice_true, bg_true=bg_true, voice_predicted=voice_predicted, window_size=hyper_parameters['window_size'], mix=mix, mix_magnitude=mix_magnitude, mix_phase=mix_phase, hop=hyper_parameters['hop_size'], context_length=hyper_parameters['context_length'], output_file_name=output_file_names[index]) e_time = time.time() printing.print_msg( usage_output_string_per_example.format(f=sources_list[index], t=e_time - s_time)) total_time += e_time - s_time printing.print_msg('MaDTwinNet finished') printing.print_msg(usage_output_string_total.format(t=total_time)) printing.print_msg('That\'s all folks!')