コード例 #1
0
def generate(parser):
    args = parser.parse_args()

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.categoryVectorDim_G

    # We load a dummy data loader for post-processing
    postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor()

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "interpolation")
    output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M'))
    
    # Create evaluation manager
    eval_manager = StyleGEvaluationManager(model, n_gen=100)

    gen_batch = eval_manager.test_single_pitch_latent_interpolation()
    output_path = mkdir_in_path(output_dir, f"one_z_pitch_sweep")
    audio_out = map(postprocess, gen_batch)
    saveAudioBatch(audio_out,
                   path=output_path,
                   basename='test_pitch_sweep', 
                   sr=config["transformConfig"]["sample_rate"])
    print("FINISHED!\n")
コード例 #2
0
def generate(parser):
    parser.add_argument("--val", dest="val", action='store_true')
    parser.add_argument("-c", dest="config", type=str)
    args = parser.parse_args()

    config = read_json(args.config)
    # We load a dummy data loader for post-processing
    transform_config = config['transform_config']
    loader_config = config['loader_config']
    processor = AudioProcessor(**transform_config)
    postprocess = processor.get_postprocessor()
    assert os.path.exists(args.outdir), "Output path does not exist"
    # Create output evaluation dir
    trval = 'val' if args.val else 'train'
    output_dir = mkdir_in_path(args.outdir, f"true_sample_{config['name']}")
    output_dir = mkdir_in_path(
        output_dir,
        f"{trval}_{args.n_gen}_{datetime.now().strftime('%Y-%m-%d_%H_%M')}")

    dbname = loader_config['dbname']
    loader = get_data_loader(dbname)(name=dbname + '_' +
                                     transform_config['transform'],
                                     preprocessing=processor,
                                     **loader_config)

    if args.val:
        data, _ = loader.get_validation_set(args.n_gen)
    else:
        data = random.sample(loader.data, k=args.n_gen)
    audio_out = map(postprocess, data)
    saveAudioBatch(audio_out,
                   path=output_dir,
                   basename='true_sample',
                   sr=config["transform_config"]["sample_rate"])
    print("FINISHED!\n")
コード例 #3
0
def test(parser, visualisation=None):
    args = parser.parse_args()
    if GPU_is_available:
        device = 'cuda'
    else:
        device = 'cpu'
    true_files = list_files_abs_path(args.true_path, 'wav')
    fake_files = list_files_abs_path(args.fake_path, 'wav')

    output_path = args.dir
    output_path = mkdir_in_path(output_path, "evaluation_metrics")
    output_path = mkdir_in_path(output_path, "fad")
    real_paths_csv = f"{output_path}/real_audio.cvs"
    with open(real_paths_csv, "w") as f:
        for file_path in true_files:
            f.write(file_path + '\n')
    fake_paths_csv = f"{output_path}/fake_audio.cvs"
    with open(fake_paths_csv, "w") as f:
        for file_path in fake_files:
            f.write(file_path + '\n')

    fad = float(
        subprocess.check_output([
            "sh", "shell_scripts/fad.sh", "--real=" + real_paths_csv,
            "--fake=" + fake_paths_csv, "--output=" + output_path
        ]).decode()[-10:-1])
    with open(
            f"{output_path}/fad_{len(true_files)}_{datetime.now().strftime('%y_%m_%d')}.txt",
            "w") as f:
        f.write(str(fad))
        f.close()

    print("FAD={0:.4f}".format(fad))
コード例 #4
0
ファイル: randz_constc.py プロジェクト: lonce/sonyGanFork
def generate(parser):
    args = parser.parse_args()

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.categoryVectorDim_G

    # check if conditioning attribute is present
    if 'attribute_list' in config['loaderConfig'].keys():
        condition_parameter = config['loaderConfig']['attribute_list'][0]
    else:
        print("There is no conditioning parameter ('attribute_list' is empty). Exiting!")
        exit(0)
    # We load a dummy data loader for post-processing
    postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor()

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "randz_constc")
    output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M'))
    
    # Create evaluation manager
    p_val = 0.1111111111111111 #75
    eval_manager = StyleGEvaluationManager(model, n_gen=20)
    gen_batch, latents = eval_manager.test_single_pitch_random_z(condition_parameter,p_val)
    output_path = mkdir_in_path(output_dir, f"one_z_pitch_sweep_"+str(p_val))
    audio_out = map(postprocess, gen_batch)

    saveAudioBatch(audio_out,
                   path=output_path,
                   basename='test_pitch_sweep', 
                   sr=config["transformConfig"]["sample_rate"],
                   latents=latents)
    print("FINISHED!\n")
コード例 #5
0
    def run_tests_evaluation_and_visualization(self, scale):
        scale_output_dir = mkdir_in_path(self.output_dir, f'scale_{scale}')
        iter_output_dir  = mkdir_in_path(scale_output_dir, f'iter_{self.iter}')
        from utils.utils import saveAudioBatch

        D_true, true_emb, \
        D_fake, fake_emb, \
        D_fake_avg, fake_avg_emb, \
        true, fake, fake_avg = self.test_GAN()

        if self.modelConfig.ac_gan:
            output_dir = mkdir_in_path(iter_output_dir, 'classification_report')
            if not hasattr(self, 'cls_vis'):
                from visualization.visualization import AttClassifVisualizer
                self.cls_vis = AttClassifVisualizer(
                    output_path=output_dir,
                    env=self.modelLabel,
                    save_figs=True,
                    attributes=self.loader.header['attributes'].keys(),
                    att_val_dict=self.loader.header['attributes'])
            self.cls_vis.output_path = output_dir

            self.cls_vis.publish(
                self.ref_labels_str, 
                D_true,
                name=f'{scale}_true',
                title=f'scale {scale} True data')
            
            self.cls_vis.publish(
                self.ref_labels_str, 
                D_fake,
                name=f'{scale}_fake',
                title=f'scale {scale} Fake data')

        if self.save_gen:
            output_dir = mkdir_in_path(iter_output_dir, 'generation')
            saveAudioBatch(
                self.loader.postprocess(fake), 
                path=output_dir, 
                basename=f'gen_audio_scale_{scale}')

        if self.vis_manager != None:
            output_dir = mkdir_in_path(iter_output_dir, 'audio_plots')
            if scale >= self.n_scales -2:
                self.vis_manager.renderAudio = True

            self.vis_manager.set_postprocessing(
                self.loader.get_postprocessor())
            self.vis_manager.publish(
                true[:5], 
                # labels=D_true[:][:5],
                name=f'real_scale_{scale}', 
                output_dir=output_dir)
            self.vis_manager.publish(
                fake[:5], 
                # labels=D_fake[0][:5],
                name=f'gen_scale_{scale}', 
                output_dir=output_dir)
コード例 #6
0
    def __init__(
            self,
            data_path,
            output_path,
            size,
            _format,
            transform=None,
            # train_val_split=None,
            dbname="default",
            load_metadata=True,
            overwrite=False,
            preprocessing=None,
            preprocess=True,
            balanced_data=True,
            shuffle=False,
            **kargs):
        data.Dataset.__init__(self)

        self.data = []
        self.metadata = []

        self.output_path = \
            mkdir_in_path(os.path.expanduser(output_path), dbname)
        self.data_path = data_path
        self.size = size
        self.format = _format

        self.transform = transform
        # self.train_val_split = train_val_split
        self.load_metadata = load_metadata
        self.overwrite = overwrite
        self.preprocessing = preprocessing
        self._preprocess = preprocess
        self.balanced_data = balanced_data
        self.shuffle = shuffle

        assert os.path.exists(self.data_path), \
            f"DataLoader error: path {self.data_path} doesn't exist"
        assert self.format in FORMATS, \
            f"DataLoader error: format {self.format} not in {FORMATS}"

        self.pt_file_path = os.path.join(self.output_path, f'{dbname}.pt')
        # Reload dataset if exists else load and preprocess
        if os.path.exists(self.pt_file_path) and not self.overwrite:
            print(f"Dataset {self.pt_file_path} exists. Reloading...")
            # self.data, self.metadata = torch.load(self.pt_file_path)
            new_obj = torch.load(self.pt_file_path)
            self.__dict__.update(new_obj.__dict__)
        else:
            print(f"Creating dataset in {self.pt_file_path}")
            # Load data
            self.load_data()
            # Shuffle data
            if self.shuffle: self.shuffle_data()
            # Preprocessing:
            if self.preprocessing and self._preprocess:
                self.preprocess()
            #torch.save(self, self.pt_file_path, pickle_module=dill)
            print(f"Dataset saved.")
        print("Dataset loaded!")
コード例 #7
0
ファイル: visualization.py プロジェクト: wgwangang/DrumGAN
    def publish_AC_activations(self,
                               data,
                               axes,
                               name="",
                               trace_names=None,
                               output_dir=None):
        output_dir = mkdir_in_path(output_dir, "AC_activations")
        fig = Figure()
        win = name
        data = data.numpy()
        self.update_tokens(win)
        if trace_names is None:
            trace_names = [f'trace {i}' for i in range(len(data))]

        for i, act in enumerate(data):
            fig.add_trace(
                go.Scatterpolar(name=str(trace_names[i]),
                                theta=axes,
                                mode='lines',
                                r=act,
                                fill='toself'))

        fig.update_layout(
            title=name,
            polar=dict(radialaxis=dict(visible=True, range=[0, 1])))

        self.window_tokens[win] = \
            vis.plotlyplot(fig, env=self.env + '_AC', win=self.window_tokens[win])
        if output_dir:
            plot(fig,
                 filename=join(output_dir, name + '.html'),
                 auto_open=False)
コード例 #8
0
def generate(parser):
    args = parser.parse_args()
    device = get_device()

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.noiseVectorDim
    transform_config = config['transform_config']
    loader_config = config['loader_config']
    # We load a dummy data loader for post-processing
    processor = AudioProcessor(**transform_config)

    dbname = loader_config['dbname']
    loader_config["criteria"]["size"] = 1000
    loader = get_data_loader(dbname)(
        name=dbname + '_' + transform_config['transform'],
        preprocessing=processor, **loader_config)


    label = torch.Tensor(random.sample(loader.metadata, k=1))

    labels, _ = model.buildNoiseData(1, inputLabels=label, skipAtts=True)
    z = labels.repeat(args.n_gen, 1)

    z_noise = radial_interpolation(latentDim, args.n_gen)

    z[:, :latentDim] = z_noise

    gnet = model.getOriginalG()
    gnet.eval()
    with torch.no_grad():
        out = gnet(z.to(device)).detach().cpu()

        audio_out = loader.postprocess(out)

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "radial_interpolation")
    output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M'))

    saveAudioBatch(audio_out,
                   path=output_dir,
                   basename='test_radial_interpolation', 
                   sr=config["transform_config"]["sample_rate"])
    print("FINISHED!\n")
コード例 #9
0
    def __init__(self,
                 data_path,
                 criteria,
                 name,
                 output_path=None,
                 getitem_processing=None,
                 overwrite=False,
                 preprocessing=None,
                 postprocessing=None,
                 preprocess=True,
                 shuffle=False,
                 **kwargs):
        data.Dataset.__init__(self)

        # input args
        self.data_path = data_path

        self.criteria = criteria
        self.getitem_processing = getitem_processing
        self.preprocessing = preprocessing
        self.preprocess = preprocess
        self.shuffle = True
        self.postprocessing = postprocessing

        # data/metadata/header attributes
        self.load_data()
        self.dbname = f'{name}_{self.__hash__()}'
        output_path = mkdir_in_path(self.data_path, 'processed')
        self.output_path = mkdir_in_path(os.path.expanduser(output_path), name)

        assert os.path.exists(self.data_path), \
            f"DataLoader error: path {self.data_path} doesn't exist"
        # assert self.format in FORMATS, \
        #     f"DataLoader error: format {self.format} not in {FORMATS}"
        self.pt_file_path = os.path.join(self.output_path, f'{self.dbname}.pt')
        # Reload dataset if exists else load and preprocess
        if os.path.exists(self.pt_file_path):
            print(f"Dataset {self.pt_file_path} exists. Reloading...")
            self.load_from_pt_file(self.pt_file_path)
        else:
            import joblib
            print(f"Saving dataset in {self.pt_file_path}")
            self.init_dataset()
            joblib.dump(self, self.pt_file_path)
コード例 #10
0
ファイル: visualization.py プロジェクト: wgwangang/DrumGAN
    def publish_mmd(self, true_data, fake_data, name, _iter, output_dir=None):
        output_dir = mkdir_in_path(output_dir, "mmd")
        self.update_tokens(name)
        if not hasattr(self, 'score'):
            self.score = {}
        if name not in self.score:
            self.score[name] = {'x': [], 'y': []}
        self.score[name]['x'].append(_iter + 1)

        mmd_distance = mmd(true_data, fake_data)
        self.score[name]['y'].append(mmd_distance)
        opts = {'title': name, 'xlabel': 'iteration', 'ylabel': 'mmd distance'}

        self.window_tokens[name] = vis.line(X=self.score[name]['x'],
                                            Y=self.score[name]['y'],
                                            opts=opts,
                                            win=self.window_tokens[name],
                                            env=self.env + '_mmd')
        if output_dir:
            self.save(iter_n=self.score[name]['x'],
                      loss_val=self.score[name]['y'],
                      title=name,
                      filename=os.path.join(output_dir, f'{name}.html'))
コード例 #11
0
ファイル: visualization.py プロジェクト: wgwangang/DrumGAN
 def __init__(self, output_path, **kargs):
     output_path = mkdir_in_path(output_path, "loss_plots")
     TensorVisualizer.__init__(self, output_path=output_path, **kargs)
コード例 #12
0
                           default=10000,
                           help="Name of the output inception model")

    argparser.add_argument('--batch-size',
                           dest='batch_size',
                           type=int,
                           default=10000,
                           help="Name of the output inception model")
    argparser.add_argument('-l',
                           '--labels',
                           dest='labels',
                           nargs='+',
                           help='Labels to train on')
    argparser.add_argument('-a',
                           '--att',
                           dest='att_cls',
                           type=str,
                           default="pitch",
                           help='Labels to train on')
    args = argparser.parse_args()

    output_path = mkdir_in_path(args.output, 'inception_models')
    output_file = join(
        output_path, f"{args.name}_{datetime.now().strftime('%Y-%m-%d')}.pt")

    train_inception_model(output_file,
                          dbsize=args.dbsize,
                          batch_size=args.batch_size,
                          labels=args.labels,
                          att_cls=args.att_cls)
コード例 #13
0
ファイル: spectset2snd.py プロジェクト: lonce/sonyGanFork
def generate(parser):
    args = parser.parse_args()

    argsObj = vars(args)
    print(f"generate args: {argsObj}")

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.categoryVectorDim_G

    # We load a dummy data loader for post-processing
    postprocess = AudioPreprocessor(
        **config['transformConfig']).get_postprocessor()
    #### I WANT TO ADD NORMALIZATION HERE  ######################
    print(f"postprocess: {postprocess}")

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "2D_spectset")
    output_dir = mkdir_in_path(output_dir,
                               datetime.now().strftime('%Y-%m-%d_%H.%M'))

    gen_batch, latents = torch.load(argsObj["gen_batch"])

    interp_steps0 = int(argsObj["d0"])
    interp_steps0norm = interp_steps0 - 1  # because the batch generater will spread the steps out to include both endpoints

    interp_steps1 = int(argsObj["d1"])
    interp_steps1norm = interp_steps1 - 1  # because the batch generater will spread the steps out to include both endpoints

    usePM = argsObj["pm"]

    g = list(gen_batch)

    assert interp_steps0 * interp_steps1 == len(
        g
    ), f"product of d0, d1 interpolation steps({interp_steps0},{interp_steps1}) != batch length ({len(g)})"

    audio_out = map(postprocess, gen_batch)

    if not usePM:  #then just output as usual, including option to write latents if provided
        saveAudioBatch(audio_out,
                       path=output_dir,
                       basename='test_2D4pt',
                       sr=config["transformConfig"]["sample_rate"],
                       latents=latents)

    else:  # save paramManager files, (and don't write latents separately)
        data = list(audio_out)  #LW it was a map, make it a list
        zdata = zip(
            data,
            latents)  #zip so we can enumerate through pairs of data/latents

        vstep = -1  # gets incremented in loop

        #d1nvar=argsObj["d1nvar"]
        d1nvar = 1  # no variations for this spectset generation

        rowlength = interp_steps0 * d1nvar
        print(f'rowlength is {rowlength}')

        for k, (audio, params) in enumerate(zdata):
            istep = int(
                k / rowlength
            )  #the outer counter, orthogonal to the two lines defining the submanifold

            j = k % rowlength
            jstep = int(j / d1nvar)
            vstep = (vstep + 1) % d1nvar

            #print(f'doing row {istep}, col {jstep}, and variation {vstep}')

            if type(audio) != np.ndarray:
                audio = np.array(audio, float)

            path = output_dir
            basename = 'test_spectset2snd'
            sr = config["transformConfig"]["sample_rate"]

            #foo=f'{basename}_{jstep}_{vstep}.wav'

            out_path = os.path.join(
                path, f'{basename}_d1.{istep}_d0.{jstep}_v.{vstep}.wav')
            # paramManager, create
            pm = paramManager.paramManager(
                out_path, output_dir
            )  ##-----------   paramManager  interface ------------------##
            #param_out_path = os.path.join(path, f'{basename}_{i}.params')
            pm.initParamFiles(overwrite=True)

            if not os.path.exists(out_path):
                #write_wav(out_path, audio.astype(float), sr)
                sf.write(out_path, audio.astype(float), sr)

                duration = len(audio.astype(float)) / float(sr)
                #print(f"duration is {duration}")
                if latents != None:
                    #pm.addParam(out_path, "dim1", [0.0,duration], [(p-minpitch)/pitchrange,(p-minpitch)/pitchrange], units="norm, midip in[58,70]", nvals=0, minval='null', maxval='null')
                    pm.addParam(
                        out_path,
                        "dim0", [0.0, duration],
                        [jstep / interp_steps0norm, jstep / interp_steps0norm],
                        units=f'norm, interp steps in[0,{interp_steps0}]',
                        nvals=interp_steps0,
                        minval='null',
                        maxval='null')
                    if interp_steps1norm > 0:  #else just doing 1D interpolation
                        pm.addParam(
                            out_path,
                            "dim1", [0.0, duration], [
                                istep / interp_steps1norm,
                                istep / interp_steps1norm
                            ],
                            units=f'norm, interp steps in[0,{interp_steps1}]',
                            nvals=interp_steps1,
                            minval='null',
                            maxval='null')

                    segments = 11  # to include a full segment for each value including endpoints
                    envTimes, envVals = makesteps(
                        np.linspace(0, duration, segments + 1, True),
                        np.linspace(
                            0, 1, segments,
                            True))  #need one extra time to flank each value
                    pm.addParam(out_path,
                                "envPt",
                                envTimes,
                                envVals,
                                units=f"norm, duration in[0,{duration}]",
                                nvals=0,
                                minval='null',
                                maxval='null')

                    # write paramfile
                    #torch.save(params, param_out_path)
                    #np.savetxt(txt_param_out_path, params.cpu().numpy())
            else:
                print(f"saveAudioBatch: File {out_path} exists. Skipping...")
                continue

    print(f"GRID data output path/pattern: {out_path}\n")
コード例 #14
0
def generate(parser):
    args = parser.parse_args()
    kwargs = vars(args)
    model, config, model_name = load_model_checkp(**kwargs)
    latentDim = model.config.categoryVectorDim_G
    overlap = kwargs.get('overlap', 0.77)
    batch_size = kwargs.get('batch_size', 50)

    # We load a dummy data loader for post-processing
    model_postpro = AudioPreprocessor(**config['transformConfig']).get_postprocessor()

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "from_midi")
    output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M'))
    
    
    overlap_index = int(config['transformConfig']['audio_length']*overlap)

    print("Loading MIDI file")
    
    midi_file = MidiFile(args.midi)
    midi_name = os.path.basename(args.midi).split('.')[0]
    pitch_list = []
    pitch_range = config['loaderConfig']['pitch_range']
    pitch_cls_list = list(range(pitch_range[0], pitch_range[1] + 1))
    
    for i, track in enumerate(midi_file.tracks):
        for msg in track:

            if msg.type == "note_on":

                if msg.note in pitch_cls_list:
                    pitch_list.append(
                        pitch_cls_list.index(msg.note))
                else:
                    if msg.note > max(pitch_cls_list):
                        if msg.note - 12 in pitch_cls_list:
                            pitch_list.append(
                                pitch_cls_list.index(msg.note - 12))
                    if msg.note < min(pitch_cls_list):
                        if msg.note + 12 in pitch_cls_list:
                            pitch_list.append(
                                pitch_cls_list.index(msg.note + 12))

    output_audio = np.array([])
    pbar = trange(int(np.ceil(len(pitch_list)/batch_size)), desc="fake data IS loop")
    input_z, _ = model.buildNoiseData(batch_size, None, skipAtts=True)
    
    z = input_z[:, :-len(pitch_cls_list)].clone()
    z = interpolate_batch(z[0], z[1], steps=batch_size)
    n_interp = z.size(0)
    alpha = 0
    k = 0
    
    for j in pbar:
        input_labels = torch.LongTensor(pitch_list[j*batch_size: batch_size*(j+1)])
        input_z, _ = model.buildNoiseData(len(input_labels), inputLabels=input_labels.reshape(-1, 1), skipAtts=True)
        z_target = input_z[0, :-len(pitch_cls_list)].clone()
        input_z[:, :-len(pitch_cls_list)] = z.clone()[:len(input_labels)]
        gen_batch = model.test(input_z, getAvG=True)
        gen_raw = map(model_postpro, gen_batch)
        gen_raw = map(lambda x: np.array(x).astype(float), gen_raw)
        z = interpolate_batch(z[-1], z_target, batch_size)
        for i, g in enumerate(gen_raw):
            if i==0 and j == 0:
                output_audio = g
            else:
                output_audio = np.concatenate([output_audio, np.zeros(len(g) - overlap_index)])
                output_audio[-len(g):] += g
    # output_audio /= max(output_audio)
    # output_audio[output_audio > 1] = 1
    output_audio /= max(output_audio)
    write_wav(f'{output_dir}/{midi_name}_{datetime.today().strftime("%Y_%m_%d_%H")}.wav', output_audio, 16000)
コード例 #15
0
def train_inception_model(name: str,
                          path: str,
                          labels: list,
                          config: str,
                          batch_size: int = 50,
                          n_epoch=100):

    output_path = mkdir_in_path(path, 'inception_models')
    output_file = join(output_path,
                       f"{name}_{datetime.now().strftime('%Y-%m-%d')}.pt")
    output_log = join(output_path,
                      f"{name}_{datetime.now().strftime('%Y-%m-%d')}.log")
    logging.basicConfig(filename=output_log, level=logging.INFO)

    assert os.path.exists(config), f"Path to config {config} does not exist"
    config = read_json(config)

    loader_config = config['loader_config']
    transform_config = config['transform_config']
    transform = transform_config['transform']
    dbname = loader_config.pop('dbname')
    loader_module = get_data_loader(dbname)
    processor = AudioProcessor(**transform_config)
    loader = loader_module(name=dbname + '_' + transform,
                           preprocessing=processor,
                           **loader_config)

    mel = MelScale(sample_rate=transform_config['sample_rate'],
                   fft_size=transform_config['fft_size'],
                   n_mel=transform_config.get('n_mel', 256),
                   rm_dc=True)

    val_data, val_labels = loader.get_validation_set()
    val_data = val_data[:, 0:1]

    att_dict = loader.header['attributes']
    att_classes = att_dict.keys()

    num_classes = sum(len(att_dict[k]['values']) for k in att_classes)

    data_loader = DataLoader(loader,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=2)

    device = "cuda" if GPU_is_available() else "cpu"

    inception_model = nn.DataParallel(
        SpectrogramInception3(num_classes, aux_logits=False))
    inception_model.to(device)

    optim = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                    inception_model.parameters()),
                             betas=[0, 0.99],
                             lr=0.001)

    criterion = ACGANCriterion(att_dict)
    epochs = trange(n_epoch, desc='train-loop')

    for i in epochs:
        data_iter = iter(data_loader)
        iter_bar = trange(len(data_iter), desc='epoch-loop')
        inception_model.train()
        for j in iter_bar:
            input, target = data_iter.next()
            input.requires_grad = True
            input.to(device)

            # take magnitude
            input = mel(input.float())

            mag_input = F.interpolate(input[:, 0:1], (299, 299))
            optim.zero_grad()

            output = inception_model(mag_input.float())
            loss = criterion.getCriterion(output, target.to(device))

            loss.backward()
            state_msg = f'Iter: {j}; loss: {loss.item():0.2f} '
            iter_bar.set_description(state_msg)
            optim.step()

        # SAVE CHECK-POINT
        if i % 10 == 0:
            if isinstance(inception_model, torch.nn.DataParallel):
                torch.save(inception_model.module.state_dict(), output_file)
            else:
                torch.save(inception_model.state_dict(), output_file)

        # EVALUATION
        with torch.no_grad():
            inception_model.eval()

            val_i = int(np.ceil(len(val_data) / batch_size))
            vloss = 0
            prec = 0
            y_pred = []
            y_true = []
            prec = {k: 0 for k in att_classes}

            for k in range(val_i):
                vlabels = val_labels[k * batch_size:batch_size * (k + 1)]
                vdata = val_data[k * batch_size:batch_size * (k + 1)]
                vdata = mel(vdata.float())
                vdata = F.interpolate(vdata, (299, 299))

                vpred = inception_model(vdata.to(device))
                vloss += criterion.getCriterion(vpred,
                                                vlabels.to(device)).item()
                vlabels_pred, _ = criterion.getPredictionLabels(vpred)
                y_pred.append(vlabels_pred)
                # y_true += list(vlabels)

            y_pred = torch.cat(y_pred)

            pred_labels = loader.index_to_labels(y_pred)
            true_labels = loader.index_to_labels(val_labels)
            for i, c in enumerate(att_classes):

                # if class is xentroopy...
                if att_dict[c]['loss'] == 'mse': continue
                logging.info(c)
                pred = [l[i] for l in pred_labels]
                true = [l[i] for l in true_labels]
                cm = confusion_matrix(true, pred, labels=att_dict[c]['values'])
                print("")
                print("Confusion Matrix")
                print(cm)
                logging.info(cm)
                print("")
                target_names = [str(v) for v in att_dict[c]['values']]
                crep = classification_report(true,
                                             pred,
                                             target_names=target_names,
                                             labels=target_names)
                logging.info(crep)
                print(crep)
            state_msg2 = f'epoch {i}; val_loss: {vloss / val_i: 0.2f}'
            logging.info(state_msg2)
            epochs.set_description(state_msg2)
コード例 #16
0
    # configuration file path
    config_file_path = kwargs.get("configPath", None)
    config = load_config_file(config_file_path)

    ########### MODEL CONFIG ############
    model_config = config["modelConfig"]
    for item, val in configOverride.items():
        model_config[item] = val

    ########### DATA CONFIG #############
    for item, val in configOverride.items():
        data_config[item] = val

    exp_name = config.get("name", "default")
    checkPointDir = config["output_path"]
    checkPointDir = mkdir_in_path(checkPointDir, exp_name)
    # config["output_path"] = checkPointDir

    # LOAD CHECKPOINT
    print("Search and load last checkpoint")
    checkPointData = getLastCheckPoint(checkPointDir, exp_name)
    nSamples = kwargs['n_samples']

    # CONFIG DATA MANAGER
    print("Data manager configuration")
    data_manager = AudioPreprocessor(**config['transformConfig'])

    data_loader = NSynthLoader(dbname=f"NSynth_{data_manager.transform}",
                               output_path=checkPointDir,
                               preprocessing=data_manager.get_preprocessor(),
                               **config['loaderConfig'])
コード例 #17
0
def generate(parser):
    args = parser.parse_args()

    argsObj=vars(args)
    print(f"generate args: {argsObj}")

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.categoryVectorDim_G

    # We load a dummy data loader for post-processing
    postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor()
    #### I WANT TO ADD NORMALIZATION HERE  ######################
    print(f"postprocess: {postprocess}")

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "2D")
    output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M'))




    
    # Create evaluation manager
    eval_manager = StyleGEvaluationManager(model, n_gen=2)

    z0=torch.load(argsObj["z0"])
    z1=torch.load(argsObj["z1"])

    minpitch=int(argsObj["p0"])
    maxpitch=int(argsObj["p1"])
    pitchrange=maxpitch-minpitch
    if pitchrange < 1 : 
        pitchrange=1

    interp_steps1=int(argsObj["d1"])
    interp_steps1norm=interp_steps1 -1 # because the batch generater will spread the steps out to include both endpoints

    usePM=argsObj["pm"]
    print(f"interp_steps1 is {interp_steps1}, and usePM (use ParamManager) is {usePM}")

    for p in range(minpitch, maxpitch+1) :

        #######   ---- with conditioned pitch
        # linear
        #gen_batch, latents = eval_manager.test_single_pitch_latent_interpolation(p_val=p, z0=z0, z1=z1, steps=10)
        #sperical
        #gen_batch, latents = eval_manager.qslerp(pitch=p, z0=z0, z1=z1, steps=10)
        #staggred
        gen_batch, latents = eval_manager.test_single_pitch_latent_staggered_interpolation(p_val=p, z0=z0, z1=z1, steps=interp_steps1, d1nvar=argsObj["d1nvar"], d1var=argsObj["d1var"])




        audio_out = map(postprocess, gen_batch)

        if not usePM :  #then just output as usual, including option to write latents if provided
            saveAudioBatch(audio_out,
                           path=output_dir,
                           basename='test_pitch_sweep'+ "_"+str(p), 
                           sr=config["transformConfig"]["sample_rate"],
                           latents=latents)

        else:                       # save paramManager files, (and don't write latents separately)
            data=list(audio_out) #LW it was a map, make it a list
            zdata=zip(data,latents) #zip so we can enumerate through pairs of data/latents

            istep=0
            vstep=0

            for i, (audio, params) in enumerate(zdata) :

                istep=int(i/argsObj["d1nvar"])
                vstep=(vstep+1)%argsObj["d1nvar"]

                if type(audio) != np.ndarray:
                    audio = np.array(audio, float)

                path=output_dir
                basename='test_pitch_sweep'+ "_"+str(p) 
                sr=config["transformConfig"]["sample_rate"]

                #foo=f'{basename}_{istep}_{vstep}.wav'

                out_path = os.path.join(path, f'{basename}_{istep}_{vstep}.wav')
                # paramManager, create 
                pm=paramManager.paramManager(out_path, output_dir)  ##-----------   paramManager  interface ------------------##
                #param_out_path = os.path.join(path, f'{basename}_{i}.params')
                pm.initParamFiles(overwrite=True)


                if not os.path.exists(out_path):
                    #write_wav(out_path, audio.astype(float), sr)
                    sf.write(out_path, audio.astype(float), sr)

                    duration=len(audio.astype(float))/float(sr)
                    #print(f"duration is {duration}")
                    if latents != None :
                        pm.addParam(out_path, "pitch", [0.0,duration], [(p-minpitch)/pitchrange,(p-minpitch)/pitchrange], units="norm, midip in[58,70]", nvals=0, minval='null', maxval='null')
                        pm.addParam(out_path, "instID", [0.0,duration], [istep/interp_steps1norm,istep/interp_steps1norm], units="norm, interp steps in[0,10]", nvals=10, minval='null', maxval='null')
                        #pm.addParam(out_path, "envPt", [0.0,duration], [0,1.0], units=f"norm, duration in[0,{duration}]", nvals=0, minval='null', maxval='null')
                    
                        segments=11 # to include a full segment for each value including endpoints
                        envTimes, envVals=makesteps(np.linspace(0,duration,segments+1,True) , np.linspace(0,1,segments,True)) #need one extra time to flank each value
                        pm.addParam(out_path, "envPt", envTimes, envVals, units=f"norm, duration in[0,{duration}]", nvals=0, minval='null', maxval='null')

                        # write paramfile 
                        #torch.save(params, param_out_path)
                        #np.savetxt(txt_param_out_path, params.cpu().numpy())
                else:
                    print(f"saveAudioBatch: File {out_path} exists. Skipping...")
                    continue





    print("FINISHED!\n")
コード例 #18
0
ファイル: visualization.py プロジェクト: wgwangang/DrumGAN
    def publish_PCA(self,
                    data,
                    name,
                    total_labels,
                    env="",
                    labels=[],
                    output_dir=None):
        output_dir = mkdir_in_path(output_dir, "pca")
        if not hasattr(self, 'pca_dict'):
            self.pca_dict = {}
        if len(labels) == 0:
            total_labels = ["unlabeled_trace"]
            labels = np.array(len(data) * total_labels)
        if name not in self.pca_dict:
            self.pca_dict[name] = {l: {'x': [], 'y': []} for l in total_labels}
            self.pca_dict[name]['n_steps'] = 0

        data = data.reshape(data.size(0), -1)
        pca_data = PCA(data).numpy()
        win_name = name
        self.update_tokens(win_name)

        if len(total_labels) == 0: return -1
        tr_list = []
        for label in total_labels:

            self.pca_dict[name][label]['x'].append(pca_data[labels == label,
                                                            0])
            self.pca_dict[name][label]['y'].append(pca_data[labels == label,
                                                            1])

            step_list = []
            for step in range(self.pca_dict[name]['n_steps'] + 1):
                if step < len(self.pca_dict[name][label]['x']):
                    step_list.append(
                        go.Scatter(visible=False,
                                   mode='markers',
                                   name=label,
                                   x=self.pca_dict[name][label]['x'][step],
                                   y=self.pca_dict[name][label]['y'][step]))
                else:
                    # if label is not in step add an empty trace
                    step_list.append({})

            step_list[-1]['visible'] = True

            tr_list += step_list

        fig = go.Figure(data=tr_list)
        steps = []

        for i in range(self.pca_dict[name]['n_steps'] + 1):
            step = dict(
                method='restyle',
                args=['visible', [False] * len(fig.data)],
            )
            for j in range(len(total_labels)):
                step['args'][1][i + j *
                                (self.pca_dict[name]['n_steps'] +
                                 1)] = True  # Toggle i'th trace to "visible"
            steps.append(step)

        sliders = [dict(steps=steps)]
        fig.update_layout(title_text=name, sliders=sliders, showlegend=True)
        self.window_tokens[win_name] = vis.plotlyplot(
            fig, env=self.env + env + '_pca', win=self.window_tokens[win_name])
        if output_dir:
            plot(fig,
                 filename=join(output_dir, win_name + '.html'),
                 auto_open=False)

        self.pca_dict[name]['n_steps'] += 1
コード例 #19
0
    def __init__(self,
                 model_name,
                 checkpoint_dir,
                 gpu=True,
                 visualisation=None,
                 loader=None,
                 loss_plot_i=5000,
                 eval_i=5000,
                 save_iter=5000,
                 config=None,
                 pathAttribDict=None,
                 selectedAttributes=None,
                 ignoreAttribs=False,
                 n_samples=10,
                 save_gen=True,
                 vis_manager=None,
                 **kargs):
        r"""
        Args:
            - pathdb (string): path to the directorty containing the image
            dataset.
            - gpu (bool): set to True if you want to use the available GPUs
            for the training procedure
            - visualisation (module): if not None, a visualisation module to
            follow the evolution of the training
            - lossIterEvaluation (int): size of the interval on which the
            model's loss will be evaluated
            - saveIter (int): frequency at which at checkpoint should be saved
            (relevant only if modelLabel != None)
            - checkPointDir (string): if not None, directory where the
            checkpoints should be saved
            - modelLabel (string): name of the model
            - config (dictionary): configuration dictionnary.
            for all the possible options
            - pathAttribDict (string): path to the attribute dictionary giving
                                       the labels of the dataset
            - selectedAttributes (list): if not None, consider only the listed
                                     attributes for labelling
            - imagefolderDataset (bool): set to true if the data are stored in
                                        the fashion of a
                                        torchvision.datasests.ImageFolderDataset
                                        object
            - ignoreAttribs (bool): set to True if the input attrib dict should
                                    only be used as a filter on image's names
            - pathValue (string): partition value
        """

        # Parameters
        if config is None:
            config = {}

        # Load the training configuration
        self.readTrainConfig(config)

        # Checkpoints ?
        assert os.path.exists(checkpoint_dir), f'Checkpoint  dir {checkpoint_dir} does not exist!'
        self.checkPointDir = checkpoint_dir
        self.output_dir = mkdir_in_path(self.checkPointDir, 'output')
        self.modelLabel = model_name
        self.saveIter = save_iter
        self.pathLossLog = None
        self.nSamples = n_samples
        self.save_gen = save_gen
        if self.checkPointDir is not None:
            self.pathLossLog = os.path.abspath(os.path.join(self.checkPointDir,
                                                            self.modelLabel
                                                            + '_losses.pkl'))
            self.pathRefVector = os.path.abspath(os.path.join(self.checkPointDir,
                                                              self.modelLabel
                                                              + '_refVectors.pt'))

        # Initialize the model
        self.useGPU = gpu
        self.device = torch.device('cuda' if self.useGPU else 'cpu')

        if not self.useGPU:
            self.numWorkers = 1

        self.loader = loader

        self.startScale = self.modelConfig.startScale

        # # CONDITIONAL GAN
        if self.modelConfig.ac_gan:
            self.modelConfig.attribKeysOrder = \
                self.loader.get_attribute_dict()
        # Intern state
        self.runningLoss = {}
        
        # self.startScale = 0

        self.startIter = 0
        self.lossProfile = []
        self.epoch = 0
        # print("%d images detected" % int(len(self.getDataset(0, size=10))))
        self.initModel()
        # Loss printing
        self.loss_plot_i = loss_plot_i
        self.eval_i = eval_i

        self.loss_visualizer = \
            LossVisualizer(output_path=self.output_dir,
                           env=self.modelLabel,
                           save_figs=True,
                           no_visdom=vis_manager.no_visdom)

        # init ref eval vectors
        self.init_reference_eval_vectors()
        self.vis_manager = vis_manager
コード例 #20
0
ファイル: visualization.py プロジェクト: wgwangang/DrumGAN
 def __init__(self, metric_name, output_path, **kargs):
     self.metric_dict = {}
     output_path = mkdir_in_path(output_path, metric_name)
     TensorVisualizer.__init__(self, output_path=output_path, **kargs)
コード例 #21
0
ファイル: random.py プロジェクト: wgwangang/DrumGAN
def generate(parser):
    parser.add_argument("--val", dest="val", action='store_true')
    parser.add_argument("--train", dest="train", action='store_true')
    parser.add_argument("--avg-net", dest="avg_net", action='store_true')
    parser.add_argument("--name", dest="name", default="")
    parser.add_argument("--dump-labels",
                        dest="dump_labels",
                        action="store_true")
    args = parser.parse_args()

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.categoryVectorDim_G

    # We load a dummy data loader for post-processing
    transform_config = config['transform_config']
    loader_config = config['loader_config']
    processor = AudioProcessor(**transform_config)
    postprocess = processor.get_postprocessor()

    # Create output evaluation dir
    if args.val:
        name = args.name + '_val_labels'
    elif args.train:
        name = args.name + '_train_labels'
    else:
        name = args.name + '_rand_labels'
    if args.outdir == "":
        args.outdir = args.dir
    output_dir = mkdir_in_path(args.outdir, f"generation_samples")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "random")
    output_dir = mkdir_in_path(
        output_dir, name + '_' + datetime.now().strftime('%Y-%m-%d_%H_%M'))

    dbname = loader_config['dbname']
    loader = get_data_loader(dbname)(name=dbname + '_' +
                                     transform_config['transform'],
                                     preprocessing=processor,
                                     **loader_config)

    labels = None
    if model.config.ac_gan:
        if args.val:
            val_set = loader.get_validation_set()[1]
            perm = torch.randperm(val_set.size(0))
            idx = perm[:args.n_gen]
            labels = val_set[idx]
        elif args.train:
            labels = torch.Tensor(random.sample(loader.metadata, k=args.n_gen))
        else:
            labels = loader.get_random_labels(args.n_gen)

    z, _ = model.buildNoiseData(args.n_gen, inputLabels=labels, skipAtts=True)
    data_batch = []
    with torch.no_grad():
        for i in range(int(np.ceil(args.n_gen / args.batch_size))):
            data_batch.append(
                model.test(z[i * args.batch_size:args.batch_size * (i + 1)],
                           toCPU=True,
                           getAvG=args.avg_net).cpu())
        data_batch = torch.cat(data_batch, dim=0)
        audio_out = map(postprocess, data_batch)

    saveAudioBatch(audio_out,
                   path=output_dir,
                   basename='sample',
                   sr=config["transform_config"]["sample_rate"])
    if args.dump_labels:
        with open(f"{output_dir}/params_in.txt", "a") as f:
            for i in tqdm(range(args.n_gen), desc='Creating Samples'):
                params = labels[i, :-1].tolist()
                f.writelines([f"{i}, {list(params)}\n"])

    print("FINISHED!\n")
コード例 #22
0
def test(parser):
    parser.add_argument('--size', dest='size', default=1000, type=int)
    parser.add_argument('--gen', dest='gen', action='store_true')
    args = parser.parse_args()
    kargs = vars(args)
    device = get_device()
    model, config, model_name = load_model_checkp(**kargs)

    transform_config = config['transform_config']
    loader_config = config['loader_config']

    d_net = model.getOriginalD().to(device)
    g_net = model.netG.to(device).eval()
    d_net.eval()
    # We load a dummy data loader for post-processing
    processor = AudioProcessor(**transform_config)

    dbname = loader_config['dbname']

    loader_config["criteria"]["size"] = args.size
    loader = get_data_loader(dbname)(name=dbname + '_' +
                                     transform_config['transform'],
                                     preprocessing=processor,
                                     **loader_config)

    att_dict = loader.header['attributes']
    criterion = ACGANCriterion(att_dict)

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"tests_D")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir,
                               datetime.now().strftime('%Y-%m-%d %H:%M'))

    batch_size = min(args.batch_size, len(loader))
    data_loader = DataLoader(loader,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=2)

    data_iter = iter(data_loader)
    iter_bar = trange(len(data_iter), desc='epoch-loop')

    D_loss = []
    data = []
    for j in iter_bar:
        with torch.no_grad():
            input, target = data_iter.next()
            if args.gen:
                z, _ = model.buildNoiseData(target.size(0),
                                            inputLabels=target,
                                            skipAtts=True)
                input = g_net(z)

            pred = d_net(input.float().to(device)).cpu()
            clf_loss = criterion.getCriterion(pred, target.cpu())
            # get D loss
            D_loss.append(pred[:, -1])
            data.append(input.cpu())
            state_msg = f'Iter: {j}; avg D_nloss: {sum(pred[:, -1])/len(pred[:, -1]):0.3f}, classif_loss: {clf_loss:0.3f}'
            iter_bar.set_description(state_msg)
    # Create evaluation manager
    D_loss = torch.cat(D_loss)
    data = torch.cat(data)
    D_loss, idx = abs(D_loss).sort()

    audio_out = loader.postprocess(data[idx[:20]])
    saveAudioBatch(audio_out,
                   path=output_dir,
                   basename='low_W-distance',
                   sr=config["transform_config"]["sample_rate"])
    audio_out = loader.postprocess(data[idx[-20:]])
    saveAudioBatch(audio_out,
                   path=output_dir,
                   basename='high_W-distance',
                   sr=config["transform_config"]["sample_rate"])
    print("FINISHED!\n")
コード例 #23
0
def generate(parser):
    args = parser.parse_args()

    argsObj=vars(args)
    print(f"generate args: {argsObj}")

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.categoryVectorDim_G

    # We load a dummy data loader for post-processing
    postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor()
    #### I WANT TO ADD NORMALIZATION HERE  ######################
    print(f"postprocess: {postprocess}")

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "2D")
    output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M'))


    # Create evaluation manager
    eval_manager = StyleGEvaluationManager(model, n_gen=2)

    z0=torch.load(argsObj["z0"])
    z1=torch.load(argsObj["z1"])
    if argsObj["z2"] == None :
        z2=0
    else :
        z2=torch.load(argsObj["z2"])
    if argsObj["z3"] == None : 
        z3 = 0
    else :
        z3=torch.load(argsObj["z3"])


    interp_steps0=int(argsObj["d0"])
    interp_steps0norm=interp_steps0 -1 # because the batch generater will spread the steps out to include both endpoints

    interp_steps1=int(argsObj["d1"])
    interp_steps1norm=interp_steps1 -1 # because the batch generater will spread the steps out to include both endpoints

    usePM=argsObj["pm"]
    print(f"interp_steps0 is {interp_steps0}, interp_steps1 is {interp_steps1}, and usePM (use ParamManager) is {usePM}")



    #######   ---- unconditioned 
    gen_batch, latents = eval_manager.unconditioned_linear_interpolation(line0z0=z0, line0z1=z1, line1z0=z2, line1z1=z3, d0steps=interp_steps0, d1steps=interp_steps1, d1nvar=argsObj["d1nvar"], d1var=argsObj["d1var"])

    g=list(gen_batch)
    #for k in length

    audio_out = map(postprocess, gen_batch)


        #save the .pt file no matter what since we may want to use them to zoom in, resample, or whatever.
    if not usePM :  
        saveAudioBatch(audio_out,
                   path=output_dir,
                   basename='test_2D4pt', 
                   sr=config["transformConfig"]["sample_rate"],
                   latents=latents)

    else :                      # save paramManager files, (and don't write latents separately)
        data=list(audio_out) #LW it was a map, make it a list
        zdata=zip(data,latents) #zip so we can enumerate through pairs of data/latents


        vstep=-1  # gets incremented in loop

        rowlength=interp_steps0*argsObj["d1nvar"]
        print(f'rowlength is {rowlength}')

        for k, (audio, params) in enumerate(zdata) :
            istep = int(k/rowlength)  #the outer counter, orthogonal to the two lines defining the submanifold

            j=k%rowlength
            jstep=int(j/argsObj["d1nvar"])
            vstep=(vstep+1)%argsObj["d1nvar"]

            print(f'doing row {istep}, col {jstep}, and variation {vstep}')

            if type(audio) != np.ndarray:
                audio = np.array(audio, float)

            path=output_dir
            basename='test_2D4pt' 
            sr=config["transformConfig"]["sample_rate"]

            #foo=f'{basename}_{jstep}_{vstep}.wav'

            out_path = os.path.join(path, f'{basename}_d1.{istep}_d0.{jstep}_v.{vstep}.wav')
            # paramManager, create 
            pm=paramManager.paramManager(out_path, output_dir)  ##-----------   paramManager  interface ------------------##
            #param_out_path = os.path.join(path, f'{basename}_{i}.params')
            pm.initParamFiles(overwrite=True)


            #also save pt files in case we want to use them to create other grids, scales, etc.
            pt_param_out_path = os.path.join(path, f'{basename}_d1.{istep}_d0.{jstep}_v.{vstep}.pt')
            torch.save(params, pt_param_out_path)

            if not os.path.exists(out_path):
                #write_wav(out_path, audio.astype(float), sr)
                sf.write(out_path, audio.astype(float), sr)

                duration=len(audio.astype(float))/float(sr)
                #print(f"duration is {duration}")
                if latents != None :
                    #pm.addParam(out_path, "dim1", [0.0,duration], [(p-minpitch)/pitchrange,(p-minpitch)/pitchrange], units="norm, midip in[58,70]", nvals=0, minval='null', maxval='null')
                    pm.addParam(out_path, "dim0", [0.0,duration], [jstep/interp_steps0norm,jstep/interp_steps0norm], units=f'norm, interp steps in[0,{interp_steps0}]', nvals=interp_steps0, minval='null', maxval='null')
                    if interp_steps1norm > 0 : #else just doing 1D interpolation
                        pm.addParam(out_path, "dim1", [0.0,duration], [istep/interp_steps1norm,istep/interp_steps1norm], units=f'norm, interp steps in[0,{interp_steps1}]', nvals=interp_steps1, minval='null', maxval='null')
                
                    segments=11 # to include a full segment for each value including endpoints
                    envTimes, envVals=makesteps(np.linspace(0,duration,segments+1,True) , np.linspace(0,1,segments,True)) #need one extra time to flank each value
                    pm.addParam(out_path, "envPt", envTimes, envVals, units=f"norm, duration in[0,{duration}]", nvals=0, minval='null', maxval='null')

                    # write paramfile 
                    #torch.save(params, param_out_path)
                    #np.savetxt(txt_param_out_path, params.cpu().numpy())
            else:
                print(f"saveAudioBatch: File {out_path} exists. Skipping...")
                continue