def generate(parser): args = parser.parse_args() model, config, model_name = load_model_checkp(**vars(args)) latentDim = model.config.categoryVectorDim_G # We load a dummy data loader for post-processing postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor() # Create output evaluation dir output_dir = mkdir_in_path(args.dir, f"generation_tests") output_dir = mkdir_in_path(output_dir, model_name) output_dir = mkdir_in_path(output_dir, "interpolation") output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M')) # Create evaluation manager eval_manager = StyleGEvaluationManager(model, n_gen=100) gen_batch = eval_manager.test_single_pitch_latent_interpolation() output_path = mkdir_in_path(output_dir, f"one_z_pitch_sweep") audio_out = map(postprocess, gen_batch) saveAudioBatch(audio_out, path=output_path, basename='test_pitch_sweep', sr=config["transformConfig"]["sample_rate"]) print("FINISHED!\n")
def generate(parser): args = parser.parse_args() model, config, model_name = load_model_checkp(**vars(args)) latentDim = model.config.categoryVectorDim_G # check if conditioning attribute is present if 'attribute_list' in config['loaderConfig'].keys(): condition_parameter = config['loaderConfig']['attribute_list'][0] else: print("There is no conditioning parameter ('attribute_list' is empty). Exiting!") exit(0) # We load a dummy data loader for post-processing postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor() # Create output evaluation dir output_dir = mkdir_in_path(args.dir, f"generation_tests") output_dir = mkdir_in_path(output_dir, model_name) output_dir = mkdir_in_path(output_dir, "randz_constc") output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M')) # Create evaluation manager p_val = 0.1111111111111111 #75 eval_manager = StyleGEvaluationManager(model, n_gen=20) gen_batch, latents = eval_manager.test_single_pitch_random_z(condition_parameter,p_val) output_path = mkdir_in_path(output_dir, f"one_z_pitch_sweep_"+str(p_val)) audio_out = map(postprocess, gen_batch) saveAudioBatch(audio_out, path=output_path, basename='test_pitch_sweep', sr=config["transformConfig"]["sample_rate"], latents=latents) print("FINISHED!\n")
for item, val in configOverride.items(): data_config[item] = val exp_name = config.get("name", "default") checkPointDir = config["output_path"] checkPointDir = mkdir_in_path(checkPointDir, exp_name) # config["output_path"] = checkPointDir # LOAD CHECKPOINT print("Search and load last checkpoint") checkPointData = getLastCheckPoint(checkPointDir, exp_name) nSamples = kwargs['n_samples'] # CONFIG DATA MANAGER print("Data manager configuration") data_manager = AudioPreprocessor(**config['transformConfig']) data_loader = NSynthLoader(dbname=f"NSynth_{data_manager.transform}", output_path=checkPointDir, preprocessing=data_manager.get_preprocessor(), **config['loaderConfig']) print(f"Loading data. Found {len(data_loader)} instances") model_config['output_shape'] = data_manager.get_output_shape() config["modelConfig"] = model_config # Save config file save_config_file(config, os.path.join(checkPointDir, f'{exp_name}_config.json')) GANTrainer = ProgressiveGANTrainer(modelLabel=exp_name,
def generate(parser): args = parser.parse_args() argsObj = vars(args) print(f"generate args: {argsObj}") model, config, model_name = load_model_checkp(**vars(args)) latentDim = model.config.categoryVectorDim_G # We load a dummy data loader for post-processing postprocess = AudioPreprocessor( **config['transformConfig']).get_postprocessor() #### I WANT TO ADD NORMALIZATION HERE ###################### print(f"postprocess: {postprocess}") # Create output evaluation dir output_dir = mkdir_in_path(args.dir, f"generation_tests") output_dir = mkdir_in_path(output_dir, model_name) output_dir = mkdir_in_path(output_dir, "2D_spectset") output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d_%H.%M')) gen_batch, latents = torch.load(argsObj["gen_batch"]) interp_steps0 = int(argsObj["d0"]) interp_steps0norm = interp_steps0 - 1 # because the batch generater will spread the steps out to include both endpoints interp_steps1 = int(argsObj["d1"]) interp_steps1norm = interp_steps1 - 1 # because the batch generater will spread the steps out to include both endpoints usePM = argsObj["pm"] g = list(gen_batch) assert interp_steps0 * interp_steps1 == len( g ), f"product of d0, d1 interpolation steps({interp_steps0},{interp_steps1}) != batch length ({len(g)})" audio_out = map(postprocess, gen_batch) if not usePM: #then just output as usual, including option to write latents if provided saveAudioBatch(audio_out, path=output_dir, basename='test_2D4pt', sr=config["transformConfig"]["sample_rate"], latents=latents) else: # save paramManager files, (and don't write latents separately) data = list(audio_out) #LW it was a map, make it a list zdata = zip( data, latents) #zip so we can enumerate through pairs of data/latents vstep = -1 # gets incremented in loop #d1nvar=argsObj["d1nvar"] d1nvar = 1 # no variations for this spectset generation rowlength = interp_steps0 * d1nvar print(f'rowlength is {rowlength}') for k, (audio, params) in enumerate(zdata): istep = int( k / rowlength ) #the outer counter, orthogonal to the two lines defining the submanifold j = k % rowlength jstep = int(j / d1nvar) vstep = (vstep + 1) % d1nvar #print(f'doing row {istep}, col {jstep}, and variation {vstep}') if type(audio) != np.ndarray: audio = np.array(audio, float) path = output_dir basename = 'test_spectset2snd' sr = config["transformConfig"]["sample_rate"] #foo=f'{basename}_{jstep}_{vstep}.wav' out_path = os.path.join( path, f'{basename}_d1.{istep}_d0.{jstep}_v.{vstep}.wav') # paramManager, create pm = paramManager.paramManager( out_path, output_dir ) ##----------- paramManager interface ------------------## #param_out_path = os.path.join(path, f'{basename}_{i}.params') pm.initParamFiles(overwrite=True) if not os.path.exists(out_path): #write_wav(out_path, audio.astype(float), sr) sf.write(out_path, audio.astype(float), sr) duration = len(audio.astype(float)) / float(sr) #print(f"duration is {duration}") if latents != None: #pm.addParam(out_path, "dim1", [0.0,duration], [(p-minpitch)/pitchrange,(p-minpitch)/pitchrange], units="norm, midip in[58,70]", nvals=0, minval='null', maxval='null') pm.addParam( out_path, "dim0", [0.0, duration], [jstep / interp_steps0norm, jstep / interp_steps0norm], units=f'norm, interp steps in[0,{interp_steps0}]', nvals=interp_steps0, minval='null', maxval='null') if interp_steps1norm > 0: #else just doing 1D interpolation pm.addParam( out_path, "dim1", [0.0, duration], [ istep / interp_steps1norm, istep / interp_steps1norm ], units=f'norm, interp steps in[0,{interp_steps1}]', nvals=interp_steps1, minval='null', maxval='null') segments = 11 # to include a full segment for each value including endpoints envTimes, envVals = makesteps( np.linspace(0, duration, segments + 1, True), np.linspace( 0, 1, segments, True)) #need one extra time to flank each value pm.addParam(out_path, "envPt", envTimes, envVals, units=f"norm, duration in[0,{duration}]", nvals=0, minval='null', maxval='null') # write paramfile #torch.save(params, param_out_path) #np.savetxt(txt_param_out_path, params.cpu().numpy()) else: print(f"saveAudioBatch: File {out_path} exists. Skipping...") continue print(f"GRID data output path/pattern: {out_path}\n")
def test(parser): args = parser.parse_args() kwargs = vars(args) nsynth_path = kwargs.get('data_path') batch_size = kwargs.get('batch_size', 50) is_samples = kwargs.get('is_samples', 5000) gen_files = list(list_files_abs_path(args.fake_path, 'wav')) n_samples = len(gen_files) is_samples = min(n_samples, is_samples) if args.inception_model == None: args.inception_model = DEFAULT_INSTRUMENT_INCEPTION_MODEL print(f"Loading inception model: {args.inception_model}") device = 'cuda' if GPU_is_available() else 'cpu' state_dict = torch.load(args.inception_model, map_location=device) output_path = os.path.join(args.dir, "evaluation_metrics") checkexists_mkdir(output_path) inception_cls = SpectrogramInception3(state_dict['fc.weight'].shape[0], aux_logits=False) inception_cls.load_state_dict(state_dict) nsynth_prepro = AudioPreprocessor( **DEFAULT_INCEPTION_PREPROCESSING_CONFIG).get_preprocessor() inception_score = [] print( "Computing inception score on true data...\nYou can skip this with ctrl+c" ) try: pbar = trange(int(n_samples / is_samples), desc="real data IS loop") for j in pbar: processed_real = list( map(nsynth_prepro, gen_files[j * is_samples:is_samples * (j + 1)])) is_maker = InceptionScore(inception_cls) is_data = torch.stack(processed_real, dim=0) is_data = is_data[:, 0:1] for i in range(int(np.ceil(is_samples / batch_size))): fake_data = F.interpolate( is_data[i * batch_size:batch_size * (i + 1)], (299, 299)) is_maker.updateWithMiniBatch(fake_data) inception_score.append(is_maker.getScore()) IS_mean = np.mean(inception_score) IS_std = np.std(inception_score) pbar.set_description("IIS = {0:.4f} +- {1:.4f}".format( IS_mean, IS_std / 2.)) output_file = f'{output_path}/IIS_{str(n_samples)}_{datetime.now().strftime("%d-%m-%y")}.txt' with open(output_file, 'w') as f: f.write(str(IS_mean) + '\n') f.write(str(IS_std)) f.close() except KeyboardInterrupt as k: print("Skipping true data inception score")
def test(parser, visualisation=None): args = parser.parse_args() kwargs = vars(args) nsynth_path = kwargs.get('data_path') att = kwargs.get('att_name', 'pitch') batch_size = kwargs.get('batch_size', 50) is_samples = kwargs.get('is_samples', 5000) true_files = list_files_abs_path(args.true_path, 'wav') fake_files = list_files_abs_path(args.fake_path, 'wav') n_samples = min(len(true_files), len(fake_files)) is_samples = min(n_samples, is_samples) if args.inception_model == None: args.inception_model = DEFAULT_INSTRUMENT_INCEPTION_MODEL print(f"Loading inception model: {args.inception_model}") device = 'cuda' if GPU_is_available() else 'cpu' state_dict = torch.load(args.inception_model, map_location=device) output_path = args.dir output_path = mkdir_in_path(output_path, "evaluation_metrics") output_path = mkdir_in_path(output_path, "ikid") inception_cls = SpectrogramInception3(state_dict['fc.weight'].shape[0], aux_logits=False) inception_cls.load_state_dict(state_dict) inception_prepro = AudioPreprocessor( **DEFAULT_INCEPTION_PREPROCESSING_CONFIG).get_preprocessor() inception_score = [] pbar = trange(int(np.ceil(n_samples / is_samples)), desc="Main loop") mmd_distance = [] for j in pbar: real_batch = true_files[j * is_samples:is_samples * (j + 1)] fake_batch = fake_files[j * is_samples:is_samples * (j + 1)] real_logits = [] fake_logits = [] for i in trange(int(np.ceil(len(real_batch) / batch_size)), desc='Computing IKID on batch...'): real_input = map(inception_prepro, real_batch[i * batch_size:batch_size * (i + 1)]) real_input = torch.stack(list(real_input), dim=0) real_input = real_input[:, 0:1] real_input = F.interpolate(real_input, (299, 299)) fake_input = map(inception_prepro, fake_batch[i * batch_size:batch_size * (i + 1)]) fake_input = torch.stack(list(fake_input), dim=0) fake_input = fake_input[:, 0:1] fake_input = F.interpolate(fake_input, (299, 299)) real_logits.append(inception_cls(real_input).detach()) fake_logits.append(inception_cls(fake_input).detach()) real_logits = torch.cat(real_logits, dim=0) fake_logits = torch.cat(fake_logits, dim=0) mmd_distance.append(mmd(real_logits, fake_logits)) mean_MMD = np.mean(mmd_distance) var_MMD = np.std(mmd_distance) pbar.set_description("IKID = {0:.4f} +- {1:.4f}".format( mean_MMD, var_MMD)) output_file = f'{output_path}/IKID_{datetime.now().strftime("%y_%m_%d")}.txt' with open(output_file, 'w') as f: f.write(str(mean_MMD) + '\n') f.write(str(var_MMD)) f.close()
def generate(parser): args = parser.parse_args() kwargs = vars(args) model, config, model_name = load_model_checkp(**kwargs) latentDim = model.config.categoryVectorDim_G overlap = kwargs.get('overlap', 0.77) batch_size = kwargs.get('batch_size', 50) # We load a dummy data loader for post-processing model_postpro = AudioPreprocessor(**config['transformConfig']).get_postprocessor() # Create output evaluation dir output_dir = mkdir_in_path(args.dir, f"generation_tests") output_dir = mkdir_in_path(output_dir, model_name) output_dir = mkdir_in_path(output_dir, "from_midi") output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M')) overlap_index = int(config['transformConfig']['audio_length']*overlap) print("Loading MIDI file") midi_file = MidiFile(args.midi) midi_name = os.path.basename(args.midi).split('.')[0] pitch_list = [] pitch_range = config['loaderConfig']['pitch_range'] pitch_cls_list = list(range(pitch_range[0], pitch_range[1] + 1)) for i, track in enumerate(midi_file.tracks): for msg in track: if msg.type == "note_on": if msg.note in pitch_cls_list: pitch_list.append( pitch_cls_list.index(msg.note)) else: if msg.note > max(pitch_cls_list): if msg.note - 12 in pitch_cls_list: pitch_list.append( pitch_cls_list.index(msg.note - 12)) if msg.note < min(pitch_cls_list): if msg.note + 12 in pitch_cls_list: pitch_list.append( pitch_cls_list.index(msg.note + 12)) output_audio = np.array([]) pbar = trange(int(np.ceil(len(pitch_list)/batch_size)), desc="fake data IS loop") input_z, _ = model.buildNoiseData(batch_size, None, skipAtts=True) z = input_z[:, :-len(pitch_cls_list)].clone() z = interpolate_batch(z[0], z[1], steps=batch_size) n_interp = z.size(0) alpha = 0 k = 0 for j in pbar: input_labels = torch.LongTensor(pitch_list[j*batch_size: batch_size*(j+1)]) input_z, _ = model.buildNoiseData(len(input_labels), inputLabels=input_labels.reshape(-1, 1), skipAtts=True) z_target = input_z[0, :-len(pitch_cls_list)].clone() input_z[:, :-len(pitch_cls_list)] = z.clone()[:len(input_labels)] gen_batch = model.test(input_z, getAvG=True) gen_raw = map(model_postpro, gen_batch) gen_raw = map(lambda x: np.array(x).astype(float), gen_raw) z = interpolate_batch(z[-1], z_target, batch_size) for i, g in enumerate(gen_raw): if i==0 and j == 0: output_audio = g else: output_audio = np.concatenate([output_audio, np.zeros(len(g) - overlap_index)]) output_audio[-len(g):] += g # output_audio /= max(output_audio) # output_audio[output_audio > 1] = 1 output_audio /= max(output_audio) write_wav(f'{output_dir}/{midi_name}_{datetime.today().strftime("%Y_%m_%d_%H")}.wav', output_audio, 16000)
def generate(parser): args = parser.parse_args() argsObj=vars(args) print(f"generate args: {argsObj}") model, config, model_name = load_model_checkp(**vars(args)) latentDim = model.config.categoryVectorDim_G # We load a dummy data loader for post-processing postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor() #### I WANT TO ADD NORMALIZATION HERE ###################### print(f"postprocess: {postprocess}") # Create output evaluation dir output_dir = mkdir_in_path(args.dir, f"generation_tests") output_dir = mkdir_in_path(output_dir, model_name) output_dir = mkdir_in_path(output_dir, "2D") output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M')) # Create evaluation manager eval_manager = StyleGEvaluationManager(model, n_gen=2) z0=torch.load(argsObj["z0"]) z1=torch.load(argsObj["z1"]) minpitch=int(argsObj["p0"]) maxpitch=int(argsObj["p1"]) pitchrange=maxpitch-minpitch if pitchrange < 1 : pitchrange=1 interp_steps1=int(argsObj["d1"]) interp_steps1norm=interp_steps1 -1 # because the batch generater will spread the steps out to include both endpoints usePM=argsObj["pm"] print(f"interp_steps1 is {interp_steps1}, and usePM (use ParamManager) is {usePM}") for p in range(minpitch, maxpitch+1) : ####### ---- with conditioned pitch # linear #gen_batch, latents = eval_manager.test_single_pitch_latent_interpolation(p_val=p, z0=z0, z1=z1, steps=10) #sperical #gen_batch, latents = eval_manager.qslerp(pitch=p, z0=z0, z1=z1, steps=10) #staggred gen_batch, latents = eval_manager.test_single_pitch_latent_staggered_interpolation(p_val=p, z0=z0, z1=z1, steps=interp_steps1, d1nvar=argsObj["d1nvar"], d1var=argsObj["d1var"]) audio_out = map(postprocess, gen_batch) if not usePM : #then just output as usual, including option to write latents if provided saveAudioBatch(audio_out, path=output_dir, basename='test_pitch_sweep'+ "_"+str(p), sr=config["transformConfig"]["sample_rate"], latents=latents) else: # save paramManager files, (and don't write latents separately) data=list(audio_out) #LW it was a map, make it a list zdata=zip(data,latents) #zip so we can enumerate through pairs of data/latents istep=0 vstep=0 for i, (audio, params) in enumerate(zdata) : istep=int(i/argsObj["d1nvar"]) vstep=(vstep+1)%argsObj["d1nvar"] if type(audio) != np.ndarray: audio = np.array(audio, float) path=output_dir basename='test_pitch_sweep'+ "_"+str(p) sr=config["transformConfig"]["sample_rate"] #foo=f'{basename}_{istep}_{vstep}.wav' out_path = os.path.join(path, f'{basename}_{istep}_{vstep}.wav') # paramManager, create pm=paramManager.paramManager(out_path, output_dir) ##----------- paramManager interface ------------------## #param_out_path = os.path.join(path, f'{basename}_{i}.params') pm.initParamFiles(overwrite=True) if not os.path.exists(out_path): #write_wav(out_path, audio.astype(float), sr) sf.write(out_path, audio.astype(float), sr) duration=len(audio.astype(float))/float(sr) #print(f"duration is {duration}") if latents != None : pm.addParam(out_path, "pitch", [0.0,duration], [(p-minpitch)/pitchrange,(p-minpitch)/pitchrange], units="norm, midip in[58,70]", nvals=0, minval='null', maxval='null') pm.addParam(out_path, "instID", [0.0,duration], [istep/interp_steps1norm,istep/interp_steps1norm], units="norm, interp steps in[0,10]", nvals=10, minval='null', maxval='null') #pm.addParam(out_path, "envPt", [0.0,duration], [0,1.0], units=f"norm, duration in[0,{duration}]", nvals=0, minval='null', maxval='null') segments=11 # to include a full segment for each value including endpoints envTimes, envVals=makesteps(np.linspace(0,duration,segments+1,True) , np.linspace(0,1,segments,True)) #need one extra time to flank each value pm.addParam(out_path, "envPt", envTimes, envVals, units=f"norm, duration in[0,{duration}]", nvals=0, minval='null', maxval='null') # write paramfile #torch.save(params, param_out_path) #np.savetxt(txt_param_out_path, params.cpu().numpy()) else: print(f"saveAudioBatch: File {out_path} exists. Skipping...") continue print("FINISHED!\n")
def train_inception_model(output_file, att_cls="pitch", dbsize=100000, labels=["mallet"], batch_size=50): # path_out = mkdir_in_path(".", "inception_data") # path_out = "/ldaphome/jnistal/sandbox" path_out = "/home/javier/developer/inception_test" path_to_raw = "/home/javier/developer/datasets/nsynth-train/audio" att_dict_path = "/home/javier/developer/datasets/nsynth-train/examples.json" # path_to_raw = "/ldaphome/jnistal/data/nsynth-train/audio/" data_manager = AudioPreprocessor(data_path=path_to_raw, output_path=path_out, dbname='nsynth', sample_rate=16000, audio_len=16000, data_type='audio', transform='specgrams', db_size=dbsize, labels=labels, transformConfig=dict(n_frames=64, n_bins=128, fade_out=True, fft_size=1024, win_size=1024, hop_size=256, n_mel=256), load_metadata=True, loaderConfig=dict( size=dbsize, instrument_labels=labels, pitch_range=[44, 70], filter_keys=['acoustic'], attribute_list=[att_cls], att_dict_path=att_dict_path)) data_loader = data_manager.get_loader() val_data, val_labels = data_loader.train_val_split() val_data = val_data[:, 0:1] att_index = data_loader.getKeyOrders()[att_cls]['order'] att_classes = data_loader.att_classes[att_index] num_classes = len(att_classes) data_loader = DataLoader(data_loader, batch_size=batch_size, shuffle=True, num_workers=2) device = "cuda" if GPU_is_available() else "cpu" sm = nn.Softmax(dim=1) inception_model = nn.DataParallel( SpectrogramInception3(num_classes, aux_logits=False)) inception_model.to(device) optim = torch.optim.Adam(filter(lambda p: p.requires_grad, inception_model.parameters()), betas=[0, 0.99], lr=0.001) # optim = torch.optim.RMSprop(filter(lambda p: p.requires_grad, inception_model.parameters()), # alpha=1.0, lr=0.045, weight_decay=0.9) # criterion = nn.BCEWithLogitsLoss() criterion = nn.CrossEntropyLoss() epoch_bar = trange(5000, desc='train-loop') for i in epoch_bar: data_iter = iter(data_loader) iter_bar = trange(len(data_iter), desc='epoch-loop') inception_model.train() for j in iter_bar: data = data_iter.next() inputs_real = data[0] inputs_real.requires_grad = True target = data[1][:, att_index] # take magnitude cqt mag_input = F.interpolate(inputs_real[:, 0:1], (299, 299)) # mag_input = inputs_real optim.zero_grad() output = inception_model(mag_input.to(device)) loss = criterion(output, target.to(device)) loss.backward() state_msg = f'Iter: {j}; loss: {loss:0.2f} ' iter_bar.set_description(state_msg) optim.step() # SAVE CHECK-POINT if i % 10 == 0: if isinstance(inception_model, torch.nn.DataParallel): torch.save(inception_model.module.state_dict(), output_file) else: torch.save(inception_model.state_dict(), output_file) # EVALUATION with torch.no_grad(): import numpy as np inception_model.eval() val_i = int(np.ceil(len(val_data) / batch_size)) val_loss = 0 prec = 0 y_pred = [] y_true = [] for k in range(val_i): vlabels = val_labels[k * batch_size:batch_size * (k + 1)][:, att_index] val_output = inception_model( F.interpolate( val_data[k * batch_size:batch_size * (k + 1)], (299, 299))) val_loss += criterion(val_output, vlabels.long()).item() val_p = sm(val_output).detach().to(device) val_out = list(map(lambda x: x.argmax(), val_p)) y_pred += val_out y_true += list(vlabels) # val_str = midi2str([v.item() for v in val_out]) # val_freq = midi2freq([v.item() for v in val_out]) # confusion_matrix(val_output, ) prec += (torch.stack(val_out) == vlabels.long()).sum() * 100 / len(vlabels) cm = confusion_matrix([att_classes[i.int()] for i in y_pred], [att_classes[i.int()] for i in y_true], labels=att_classes) print(cm) print( classification_report(y_true, y_pred, labels=np.arange(num_classes), target_names=att_classes)) state_msg2 = f'm_precision: {prec / val_i: 0.2f} %; epoch {i}; m_val_loss: {val_loss / val_i: 0.2f}' epoch_bar.set_description(state_msg2)
def generate(parser): args = parser.parse_args() argsObj=vars(args) print(f"generate args: {argsObj}") model, config, model_name = load_model_checkp(**vars(args)) latentDim = model.config.categoryVectorDim_G # We load a dummy data loader for post-processing postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor() #### I WANT TO ADD NORMALIZATION HERE ###################### print(f"postprocess: {postprocess}") # Create output evaluation dir output_dir = mkdir_in_path(args.dir, f"generation_tests") output_dir = mkdir_in_path(output_dir, model_name) output_dir = mkdir_in_path(output_dir, "2D") output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M')) # Create evaluation manager eval_manager = StyleGEvaluationManager(model, n_gen=2) z0=torch.load(argsObj["z0"]) z1=torch.load(argsObj["z1"]) if argsObj["z2"] == None : z2=0 else : z2=torch.load(argsObj["z2"]) if argsObj["z3"] == None : z3 = 0 else : z3=torch.load(argsObj["z3"]) interp_steps0=int(argsObj["d0"]) interp_steps0norm=interp_steps0 -1 # because the batch generater will spread the steps out to include both endpoints interp_steps1=int(argsObj["d1"]) interp_steps1norm=interp_steps1 -1 # because the batch generater will spread the steps out to include both endpoints usePM=argsObj["pm"] print(f"interp_steps0 is {interp_steps0}, interp_steps1 is {interp_steps1}, and usePM (use ParamManager) is {usePM}") ####### ---- unconditioned gen_batch, latents = eval_manager.unconditioned_linear_interpolation(line0z0=z0, line0z1=z1, line1z0=z2, line1z1=z3, d0steps=interp_steps0, d1steps=interp_steps1, d1nvar=argsObj["d1nvar"], d1var=argsObj["d1var"]) g=list(gen_batch) #for k in length audio_out = map(postprocess, gen_batch) #save the .pt file no matter what since we may want to use them to zoom in, resample, or whatever. if not usePM : saveAudioBatch(audio_out, path=output_dir, basename='test_2D4pt', sr=config["transformConfig"]["sample_rate"], latents=latents) else : # save paramManager files, (and don't write latents separately) data=list(audio_out) #LW it was a map, make it a list zdata=zip(data,latents) #zip so we can enumerate through pairs of data/latents vstep=-1 # gets incremented in loop rowlength=interp_steps0*argsObj["d1nvar"] print(f'rowlength is {rowlength}') for k, (audio, params) in enumerate(zdata) : istep = int(k/rowlength) #the outer counter, orthogonal to the two lines defining the submanifold j=k%rowlength jstep=int(j/argsObj["d1nvar"]) vstep=(vstep+1)%argsObj["d1nvar"] print(f'doing row {istep}, col {jstep}, and variation {vstep}') if type(audio) != np.ndarray: audio = np.array(audio, float) path=output_dir basename='test_2D4pt' sr=config["transformConfig"]["sample_rate"] #foo=f'{basename}_{jstep}_{vstep}.wav' out_path = os.path.join(path, f'{basename}_d1.{istep}_d0.{jstep}_v.{vstep}.wav') # paramManager, create pm=paramManager.paramManager(out_path, output_dir) ##----------- paramManager interface ------------------## #param_out_path = os.path.join(path, f'{basename}_{i}.params') pm.initParamFiles(overwrite=True) #also save pt files in case we want to use them to create other grids, scales, etc. pt_param_out_path = os.path.join(path, f'{basename}_d1.{istep}_d0.{jstep}_v.{vstep}.pt') torch.save(params, pt_param_out_path) if not os.path.exists(out_path): #write_wav(out_path, audio.astype(float), sr) sf.write(out_path, audio.astype(float), sr) duration=len(audio.astype(float))/float(sr) #print(f"duration is {duration}") if latents != None : #pm.addParam(out_path, "dim1", [0.0,duration], [(p-minpitch)/pitchrange,(p-minpitch)/pitchrange], units="norm, midip in[58,70]", nvals=0, minval='null', maxval='null') pm.addParam(out_path, "dim0", [0.0,duration], [jstep/interp_steps0norm,jstep/interp_steps0norm], units=f'norm, interp steps in[0,{interp_steps0}]', nvals=interp_steps0, minval='null', maxval='null') if interp_steps1norm > 0 : #else just doing 1D interpolation pm.addParam(out_path, "dim1", [0.0,duration], [istep/interp_steps1norm,istep/interp_steps1norm], units=f'norm, interp steps in[0,{interp_steps1}]', nvals=interp_steps1, minval='null', maxval='null') segments=11 # to include a full segment for each value including endpoints envTimes, envVals=makesteps(np.linspace(0,duration,segments+1,True) , np.linspace(0,1,segments,True)) #need one extra time to flank each value pm.addParam(out_path, "envPt", envTimes, envVals, units=f"norm, duration in[0,{duration}]", nvals=0, minval='null', maxval='null') # write paramfile #torch.save(params, param_out_path) #np.savetxt(txt_param_out_path, params.cpu().numpy()) else: print(f"saveAudioBatch: File {out_path} exists. Skipping...") continue