コード例 #1
0
def generate(parser):
    args = parser.parse_args()

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.categoryVectorDim_G

    # We load a dummy data loader for post-processing
    postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor()

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "interpolation")
    output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M'))
    
    # Create evaluation manager
    eval_manager = StyleGEvaluationManager(model, n_gen=100)

    gen_batch = eval_manager.test_single_pitch_latent_interpolation()
    output_path = mkdir_in_path(output_dir, f"one_z_pitch_sweep")
    audio_out = map(postprocess, gen_batch)
    saveAudioBatch(audio_out,
                   path=output_path,
                   basename='test_pitch_sweep', 
                   sr=config["transformConfig"]["sample_rate"])
    print("FINISHED!\n")
コード例 #2
0
ファイル: randz_constc.py プロジェクト: lonce/sonyGanFork
def generate(parser):
    args = parser.parse_args()

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.categoryVectorDim_G

    # check if conditioning attribute is present
    if 'attribute_list' in config['loaderConfig'].keys():
        condition_parameter = config['loaderConfig']['attribute_list'][0]
    else:
        print("There is no conditioning parameter ('attribute_list' is empty). Exiting!")
        exit(0)
    # We load a dummy data loader for post-processing
    postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor()

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "randz_constc")
    output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M'))
    
    # Create evaluation manager
    p_val = 0.1111111111111111 #75
    eval_manager = StyleGEvaluationManager(model, n_gen=20)
    gen_batch, latents = eval_manager.test_single_pitch_random_z(condition_parameter,p_val)
    output_path = mkdir_in_path(output_dir, f"one_z_pitch_sweep_"+str(p_val))
    audio_out = map(postprocess, gen_batch)

    saveAudioBatch(audio_out,
                   path=output_path,
                   basename='test_pitch_sweep', 
                   sr=config["transformConfig"]["sample_rate"],
                   latents=latents)
    print("FINISHED!\n")
コード例 #3
0
    for item, val in configOverride.items():
        data_config[item] = val

    exp_name = config.get("name", "default")
    checkPointDir = config["output_path"]
    checkPointDir = mkdir_in_path(checkPointDir, exp_name)
    # config["output_path"] = checkPointDir

    # LOAD CHECKPOINT
    print("Search and load last checkpoint")
    checkPointData = getLastCheckPoint(checkPointDir, exp_name)
    nSamples = kwargs['n_samples']

    # CONFIG DATA MANAGER
    print("Data manager configuration")
    data_manager = AudioPreprocessor(**config['transformConfig'])

    data_loader = NSynthLoader(dbname=f"NSynth_{data_manager.transform}",
                               output_path=checkPointDir,
                               preprocessing=data_manager.get_preprocessor(),
                               **config['loaderConfig'])

    print(f"Loading data. Found {len(data_loader)} instances")
    model_config['output_shape'] = data_manager.get_output_shape()
    config["modelConfig"] = model_config

    # Save config file
    save_config_file(config,
                     os.path.join(checkPointDir, f'{exp_name}_config.json'))

    GANTrainer = ProgressiveGANTrainer(modelLabel=exp_name,
コード例 #4
0
ファイル: spectset2snd.py プロジェクト: lonce/sonyGanFork
def generate(parser):
    args = parser.parse_args()

    argsObj = vars(args)
    print(f"generate args: {argsObj}")

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.categoryVectorDim_G

    # We load a dummy data loader for post-processing
    postprocess = AudioPreprocessor(
        **config['transformConfig']).get_postprocessor()
    #### I WANT TO ADD NORMALIZATION HERE  ######################
    print(f"postprocess: {postprocess}")

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "2D_spectset")
    output_dir = mkdir_in_path(output_dir,
                               datetime.now().strftime('%Y-%m-%d_%H.%M'))

    gen_batch, latents = torch.load(argsObj["gen_batch"])

    interp_steps0 = int(argsObj["d0"])
    interp_steps0norm = interp_steps0 - 1  # because the batch generater will spread the steps out to include both endpoints

    interp_steps1 = int(argsObj["d1"])
    interp_steps1norm = interp_steps1 - 1  # because the batch generater will spread the steps out to include both endpoints

    usePM = argsObj["pm"]

    g = list(gen_batch)

    assert interp_steps0 * interp_steps1 == len(
        g
    ), f"product of d0, d1 interpolation steps({interp_steps0},{interp_steps1}) != batch length ({len(g)})"

    audio_out = map(postprocess, gen_batch)

    if not usePM:  #then just output as usual, including option to write latents if provided
        saveAudioBatch(audio_out,
                       path=output_dir,
                       basename='test_2D4pt',
                       sr=config["transformConfig"]["sample_rate"],
                       latents=latents)

    else:  # save paramManager files, (and don't write latents separately)
        data = list(audio_out)  #LW it was a map, make it a list
        zdata = zip(
            data,
            latents)  #zip so we can enumerate through pairs of data/latents

        vstep = -1  # gets incremented in loop

        #d1nvar=argsObj["d1nvar"]
        d1nvar = 1  # no variations for this spectset generation

        rowlength = interp_steps0 * d1nvar
        print(f'rowlength is {rowlength}')

        for k, (audio, params) in enumerate(zdata):
            istep = int(
                k / rowlength
            )  #the outer counter, orthogonal to the two lines defining the submanifold

            j = k % rowlength
            jstep = int(j / d1nvar)
            vstep = (vstep + 1) % d1nvar

            #print(f'doing row {istep}, col {jstep}, and variation {vstep}')

            if type(audio) != np.ndarray:
                audio = np.array(audio, float)

            path = output_dir
            basename = 'test_spectset2snd'
            sr = config["transformConfig"]["sample_rate"]

            #foo=f'{basename}_{jstep}_{vstep}.wav'

            out_path = os.path.join(
                path, f'{basename}_d1.{istep}_d0.{jstep}_v.{vstep}.wav')
            # paramManager, create
            pm = paramManager.paramManager(
                out_path, output_dir
            )  ##-----------   paramManager  interface ------------------##
            #param_out_path = os.path.join(path, f'{basename}_{i}.params')
            pm.initParamFiles(overwrite=True)

            if not os.path.exists(out_path):
                #write_wav(out_path, audio.astype(float), sr)
                sf.write(out_path, audio.astype(float), sr)

                duration = len(audio.astype(float)) / float(sr)
                #print(f"duration is {duration}")
                if latents != None:
                    #pm.addParam(out_path, "dim1", [0.0,duration], [(p-minpitch)/pitchrange,(p-minpitch)/pitchrange], units="norm, midip in[58,70]", nvals=0, minval='null', maxval='null')
                    pm.addParam(
                        out_path,
                        "dim0", [0.0, duration],
                        [jstep / interp_steps0norm, jstep / interp_steps0norm],
                        units=f'norm, interp steps in[0,{interp_steps0}]',
                        nvals=interp_steps0,
                        minval='null',
                        maxval='null')
                    if interp_steps1norm > 0:  #else just doing 1D interpolation
                        pm.addParam(
                            out_path,
                            "dim1", [0.0, duration], [
                                istep / interp_steps1norm,
                                istep / interp_steps1norm
                            ],
                            units=f'norm, interp steps in[0,{interp_steps1}]',
                            nvals=interp_steps1,
                            minval='null',
                            maxval='null')

                    segments = 11  # to include a full segment for each value including endpoints
                    envTimes, envVals = makesteps(
                        np.linspace(0, duration, segments + 1, True),
                        np.linspace(
                            0, 1, segments,
                            True))  #need one extra time to flank each value
                    pm.addParam(out_path,
                                "envPt",
                                envTimes,
                                envVals,
                                units=f"norm, duration in[0,{duration}]",
                                nvals=0,
                                minval='null',
                                maxval='null')

                    # write paramfile
                    #torch.save(params, param_out_path)
                    #np.savetxt(txt_param_out_path, params.cpu().numpy())
            else:
                print(f"saveAudioBatch: File {out_path} exists. Skipping...")
                continue

    print(f"GRID data output path/pattern: {out_path}\n")
コード例 #5
0
def test(parser):
    args = parser.parse_args()

    kwargs = vars(args)
    nsynth_path = kwargs.get('data_path')
    batch_size = kwargs.get('batch_size', 50)
    is_samples = kwargs.get('is_samples', 5000)

    gen_files = list(list_files_abs_path(args.fake_path, 'wav'))
    n_samples = len(gen_files)
    is_samples = min(n_samples, is_samples)

    if args.inception_model == None:
        args.inception_model = DEFAULT_INSTRUMENT_INCEPTION_MODEL

    print(f"Loading inception model: {args.inception_model}")
    device = 'cuda' if GPU_is_available() else 'cpu'

    state_dict = torch.load(args.inception_model, map_location=device)

    output_path = os.path.join(args.dir, "evaluation_metrics")
    checkexists_mkdir(output_path)

    inception_cls = SpectrogramInception3(state_dict['fc.weight'].shape[0],
                                          aux_logits=False)
    inception_cls.load_state_dict(state_dict)

    nsynth_prepro = AudioPreprocessor(
        **DEFAULT_INCEPTION_PREPROCESSING_CONFIG).get_preprocessor()
    inception_score = []
    print(
        "Computing inception score on true data...\nYou can skip this with ctrl+c"
    )
    try:
        pbar = trange(int(n_samples / is_samples), desc="real data IS loop")
        for j in pbar:
            processed_real = list(
                map(nsynth_prepro,
                    gen_files[j * is_samples:is_samples * (j + 1)]))
            is_maker = InceptionScore(inception_cls)
            is_data = torch.stack(processed_real, dim=0)
            is_data = is_data[:, 0:1]

            for i in range(int(np.ceil(is_samples / batch_size))):

                fake_data = F.interpolate(
                    is_data[i * batch_size:batch_size * (i + 1)], (299, 299))
                is_maker.updateWithMiniBatch(fake_data)
                inception_score.append(is_maker.getScore())

            IS_mean = np.mean(inception_score)
            IS_std = np.std(inception_score)
            pbar.set_description("IIS = {0:.4f} +- {1:.4f}".format(
                IS_mean, IS_std / 2.))
        output_file = f'{output_path}/IIS_{str(n_samples)}_{datetime.now().strftime("%d-%m-%y")}.txt'
        with open(output_file, 'w') as f:
            f.write(str(IS_mean) + '\n')
            f.write(str(IS_std))
            f.close()
    except KeyboardInterrupt as k:
        print("Skipping true data inception score")
コード例 #6
0
ファイル: ikid.py プロジェクト: lonce/sonyGanFork
def test(parser, visualisation=None):
    args = parser.parse_args()

    kwargs = vars(args)
    nsynth_path = kwargs.get('data_path')
    att = kwargs.get('att_name', 'pitch')
    batch_size = kwargs.get('batch_size', 50)
    is_samples = kwargs.get('is_samples', 5000)

    true_files = list_files_abs_path(args.true_path, 'wav')
    fake_files = list_files_abs_path(args.fake_path, 'wav')

    n_samples = min(len(true_files), len(fake_files))
    is_samples = min(n_samples, is_samples)

    if args.inception_model == None:
        args.inception_model = DEFAULT_INSTRUMENT_INCEPTION_MODEL

    print(f"Loading inception model: {args.inception_model}")
    device = 'cuda' if GPU_is_available() else 'cpu'

    state_dict = torch.load(args.inception_model, map_location=device)

    output_path = args.dir
    output_path = mkdir_in_path(output_path, "evaluation_metrics")
    output_path = mkdir_in_path(output_path, "ikid")

    inception_cls = SpectrogramInception3(state_dict['fc.weight'].shape[0],
                                          aux_logits=False)
    inception_cls.load_state_dict(state_dict)
    inception_prepro = AudioPreprocessor(
        **DEFAULT_INCEPTION_PREPROCESSING_CONFIG).get_preprocessor()
    inception_score = []

    pbar = trange(int(np.ceil(n_samples / is_samples)), desc="Main loop")
    mmd_distance = []
    for j in pbar:
        real_batch = true_files[j * is_samples:is_samples * (j + 1)]
        fake_batch = fake_files[j * is_samples:is_samples * (j + 1)]
        real_logits = []
        fake_logits = []

        for i in trange(int(np.ceil(len(real_batch) / batch_size)),
                        desc='Computing IKID on batch...'):
            real_input = map(inception_prepro,
                             real_batch[i * batch_size:batch_size * (i + 1)])
            real_input = torch.stack(list(real_input), dim=0)
            real_input = real_input[:, 0:1]
            real_input = F.interpolate(real_input, (299, 299))

            fake_input = map(inception_prepro,
                             fake_batch[i * batch_size:batch_size * (i + 1)])
            fake_input = torch.stack(list(fake_input), dim=0)
            fake_input = fake_input[:, 0:1]
            fake_input = F.interpolate(fake_input, (299, 299))

            real_logits.append(inception_cls(real_input).detach())
            fake_logits.append(inception_cls(fake_input).detach())

        real_logits = torch.cat(real_logits, dim=0)
        fake_logits = torch.cat(fake_logits, dim=0)

        mmd_distance.append(mmd(real_logits, fake_logits))
        mean_MMD = np.mean(mmd_distance)
        var_MMD = np.std(mmd_distance)
        pbar.set_description("IKID = {0:.4f} +- {1:.4f}".format(
            mean_MMD, var_MMD))
    output_file = f'{output_path}/IKID_{datetime.now().strftime("%y_%m_%d")}.txt'
    with open(output_file, 'w') as f:
        f.write(str(mean_MMD) + '\n')
        f.write(str(var_MMD))
        f.close()
コード例 #7
0
def generate(parser):
    args = parser.parse_args()
    kwargs = vars(args)
    model, config, model_name = load_model_checkp(**kwargs)
    latentDim = model.config.categoryVectorDim_G
    overlap = kwargs.get('overlap', 0.77)
    batch_size = kwargs.get('batch_size', 50)

    # We load a dummy data loader for post-processing
    model_postpro = AudioPreprocessor(**config['transformConfig']).get_postprocessor()

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "from_midi")
    output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M'))
    
    
    overlap_index = int(config['transformConfig']['audio_length']*overlap)

    print("Loading MIDI file")
    
    midi_file = MidiFile(args.midi)
    midi_name = os.path.basename(args.midi).split('.')[0]
    pitch_list = []
    pitch_range = config['loaderConfig']['pitch_range']
    pitch_cls_list = list(range(pitch_range[0], pitch_range[1] + 1))
    
    for i, track in enumerate(midi_file.tracks):
        for msg in track:

            if msg.type == "note_on":

                if msg.note in pitch_cls_list:
                    pitch_list.append(
                        pitch_cls_list.index(msg.note))
                else:
                    if msg.note > max(pitch_cls_list):
                        if msg.note - 12 in pitch_cls_list:
                            pitch_list.append(
                                pitch_cls_list.index(msg.note - 12))
                    if msg.note < min(pitch_cls_list):
                        if msg.note + 12 in pitch_cls_list:
                            pitch_list.append(
                                pitch_cls_list.index(msg.note + 12))

    output_audio = np.array([])
    pbar = trange(int(np.ceil(len(pitch_list)/batch_size)), desc="fake data IS loop")
    input_z, _ = model.buildNoiseData(batch_size, None, skipAtts=True)
    
    z = input_z[:, :-len(pitch_cls_list)].clone()
    z = interpolate_batch(z[0], z[1], steps=batch_size)
    n_interp = z.size(0)
    alpha = 0
    k = 0
    
    for j in pbar:
        input_labels = torch.LongTensor(pitch_list[j*batch_size: batch_size*(j+1)])
        input_z, _ = model.buildNoiseData(len(input_labels), inputLabels=input_labels.reshape(-1, 1), skipAtts=True)
        z_target = input_z[0, :-len(pitch_cls_list)].clone()
        input_z[:, :-len(pitch_cls_list)] = z.clone()[:len(input_labels)]
        gen_batch = model.test(input_z, getAvG=True)
        gen_raw = map(model_postpro, gen_batch)
        gen_raw = map(lambda x: np.array(x).astype(float), gen_raw)
        z = interpolate_batch(z[-1], z_target, batch_size)
        for i, g in enumerate(gen_raw):
            if i==0 and j == 0:
                output_audio = g
            else:
                output_audio = np.concatenate([output_audio, np.zeros(len(g) - overlap_index)])
                output_audio[-len(g):] += g
    # output_audio /= max(output_audio)
    # output_audio[output_audio > 1] = 1
    output_audio /= max(output_audio)
    write_wav(f'{output_dir}/{midi_name}_{datetime.today().strftime("%Y_%m_%d_%H")}.wav', output_audio, 16000)
コード例 #8
0
def generate(parser):
    args = parser.parse_args()

    argsObj=vars(args)
    print(f"generate args: {argsObj}")

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.categoryVectorDim_G

    # We load a dummy data loader for post-processing
    postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor()
    #### I WANT TO ADD NORMALIZATION HERE  ######################
    print(f"postprocess: {postprocess}")

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "2D")
    output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M'))




    
    # Create evaluation manager
    eval_manager = StyleGEvaluationManager(model, n_gen=2)

    z0=torch.load(argsObj["z0"])
    z1=torch.load(argsObj["z1"])

    minpitch=int(argsObj["p0"])
    maxpitch=int(argsObj["p1"])
    pitchrange=maxpitch-minpitch
    if pitchrange < 1 : 
        pitchrange=1

    interp_steps1=int(argsObj["d1"])
    interp_steps1norm=interp_steps1 -1 # because the batch generater will spread the steps out to include both endpoints

    usePM=argsObj["pm"]
    print(f"interp_steps1 is {interp_steps1}, and usePM (use ParamManager) is {usePM}")

    for p in range(minpitch, maxpitch+1) :

        #######   ---- with conditioned pitch
        # linear
        #gen_batch, latents = eval_manager.test_single_pitch_latent_interpolation(p_val=p, z0=z0, z1=z1, steps=10)
        #sperical
        #gen_batch, latents = eval_manager.qslerp(pitch=p, z0=z0, z1=z1, steps=10)
        #staggred
        gen_batch, latents = eval_manager.test_single_pitch_latent_staggered_interpolation(p_val=p, z0=z0, z1=z1, steps=interp_steps1, d1nvar=argsObj["d1nvar"], d1var=argsObj["d1var"])




        audio_out = map(postprocess, gen_batch)

        if not usePM :  #then just output as usual, including option to write latents if provided
            saveAudioBatch(audio_out,
                           path=output_dir,
                           basename='test_pitch_sweep'+ "_"+str(p), 
                           sr=config["transformConfig"]["sample_rate"],
                           latents=latents)

        else:                       # save paramManager files, (and don't write latents separately)
            data=list(audio_out) #LW it was a map, make it a list
            zdata=zip(data,latents) #zip so we can enumerate through pairs of data/latents

            istep=0
            vstep=0

            for i, (audio, params) in enumerate(zdata) :

                istep=int(i/argsObj["d1nvar"])
                vstep=(vstep+1)%argsObj["d1nvar"]

                if type(audio) != np.ndarray:
                    audio = np.array(audio, float)

                path=output_dir
                basename='test_pitch_sweep'+ "_"+str(p) 
                sr=config["transformConfig"]["sample_rate"]

                #foo=f'{basename}_{istep}_{vstep}.wav'

                out_path = os.path.join(path, f'{basename}_{istep}_{vstep}.wav')
                # paramManager, create 
                pm=paramManager.paramManager(out_path, output_dir)  ##-----------   paramManager  interface ------------------##
                #param_out_path = os.path.join(path, f'{basename}_{i}.params')
                pm.initParamFiles(overwrite=True)


                if not os.path.exists(out_path):
                    #write_wav(out_path, audio.astype(float), sr)
                    sf.write(out_path, audio.astype(float), sr)

                    duration=len(audio.astype(float))/float(sr)
                    #print(f"duration is {duration}")
                    if latents != None :
                        pm.addParam(out_path, "pitch", [0.0,duration], [(p-minpitch)/pitchrange,(p-minpitch)/pitchrange], units="norm, midip in[58,70]", nvals=0, minval='null', maxval='null')
                        pm.addParam(out_path, "instID", [0.0,duration], [istep/interp_steps1norm,istep/interp_steps1norm], units="norm, interp steps in[0,10]", nvals=10, minval='null', maxval='null')
                        #pm.addParam(out_path, "envPt", [0.0,duration], [0,1.0], units=f"norm, duration in[0,{duration}]", nvals=0, minval='null', maxval='null')
                    
                        segments=11 # to include a full segment for each value including endpoints
                        envTimes, envVals=makesteps(np.linspace(0,duration,segments+1,True) , np.linspace(0,1,segments,True)) #need one extra time to flank each value
                        pm.addParam(out_path, "envPt", envTimes, envVals, units=f"norm, duration in[0,{duration}]", nvals=0, minval='null', maxval='null')

                        # write paramfile 
                        #torch.save(params, param_out_path)
                        #np.savetxt(txt_param_out_path, params.cpu().numpy())
                else:
                    print(f"saveAudioBatch: File {out_path} exists. Skipping...")
                    continue





    print("FINISHED!\n")
コード例 #9
0
def train_inception_model(output_file,
                          att_cls="pitch",
                          dbsize=100000,
                          labels=["mallet"],
                          batch_size=50):
    # path_out = mkdir_in_path(".", "inception_data")
    # path_out = "/ldaphome/jnistal/sandbox"
    path_out = "/home/javier/developer/inception_test"
    path_to_raw = "/home/javier/developer/datasets/nsynth-train/audio"
    att_dict_path = "/home/javier/developer/datasets/nsynth-train/examples.json"
    # path_to_raw = "/ldaphome/jnistal/data/nsynth-train/audio/"
    data_manager = AudioPreprocessor(data_path=path_to_raw,
                                     output_path=path_out,
                                     dbname='nsynth',
                                     sample_rate=16000,
                                     audio_len=16000,
                                     data_type='audio',
                                     transform='specgrams',
                                     db_size=dbsize,
                                     labels=labels,
                                     transformConfig=dict(n_frames=64,
                                                          n_bins=128,
                                                          fade_out=True,
                                                          fft_size=1024,
                                                          win_size=1024,
                                                          hop_size=256,
                                                          n_mel=256),
                                     load_metadata=True,
                                     loaderConfig=dict(
                                         size=dbsize,
                                         instrument_labels=labels,
                                         pitch_range=[44, 70],
                                         filter_keys=['acoustic'],
                                         attribute_list=[att_cls],
                                         att_dict_path=att_dict_path))
    data_loader = data_manager.get_loader()

    val_data, val_labels = data_loader.train_val_split()
    val_data = val_data[:, 0:1]
    att_index = data_loader.getKeyOrders()[att_cls]['order']
    att_classes = data_loader.att_classes[att_index]
    num_classes = len(att_classes)

    data_loader = DataLoader(data_loader,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=2)

    device = "cuda" if GPU_is_available() else "cpu"
    sm = nn.Softmax(dim=1)

    inception_model = nn.DataParallel(
        SpectrogramInception3(num_classes, aux_logits=False))
    inception_model.to(device)

    optim = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                    inception_model.parameters()),
                             betas=[0, 0.99],
                             lr=0.001)
    # optim = torch.optim.RMSprop(filter(lambda p: p.requires_grad, inception_model.parameters()),
    #                    alpha=1.0, lr=0.045, weight_decay=0.9)
    # criterion = nn.BCEWithLogitsLoss()

    criterion = nn.CrossEntropyLoss()
    epoch_bar = trange(5000, desc='train-loop')

    for i in epoch_bar:
        data_iter = iter(data_loader)
        iter_bar = trange(len(data_iter), desc='epoch-loop')
        inception_model.train()
        for j in iter_bar:
            data = data_iter.next()
            inputs_real = data[0]
            inputs_real.requires_grad = True
            target = data[1][:, att_index]

            # take magnitude cqt
            mag_input = F.interpolate(inputs_real[:, 0:1], (299, 299))
            # mag_input = inputs_real
            optim.zero_grad()

            output = inception_model(mag_input.to(device))
            loss = criterion(output, target.to(device))

            loss.backward()
            state_msg = f'Iter: {j}; loss: {loss:0.2f} '
            iter_bar.set_description(state_msg)
            optim.step()

        # SAVE CHECK-POINT
        if i % 10 == 0:
            if isinstance(inception_model, torch.nn.DataParallel):
                torch.save(inception_model.module.state_dict(), output_file)
            else:
                torch.save(inception_model.state_dict(), output_file)

        # EVALUATION
        with torch.no_grad():
            import numpy as np
            inception_model.eval()

            val_i = int(np.ceil(len(val_data) / batch_size))
            val_loss = 0
            prec = 0
            y_pred = []
            y_true = []
            for k in range(val_i):
                vlabels = val_labels[k * batch_size:batch_size *
                                     (k + 1)][:, att_index]

                val_output = inception_model(
                    F.interpolate(
                        val_data[k * batch_size:batch_size * (k + 1)],
                        (299, 299)))
                val_loss += criterion(val_output, vlabels.long()).item()

                val_p = sm(val_output).detach().to(device)
                val_out = list(map(lambda x: x.argmax(), val_p))
                y_pred += val_out
                y_true += list(vlabels)
                # val_str = midi2str([v.item() for v in val_out])
                # val_freq = midi2freq([v.item() for v in val_out])

                # confusion_matrix(val_output, )
                prec += (torch.stack(val_out)
                         == vlabels.long()).sum() * 100 / len(vlabels)
            cm = confusion_matrix([att_classes[i.int()] for i in y_pred],
                                  [att_classes[i.int()] for i in y_true],
                                  labels=att_classes)
            print(cm)
            print(
                classification_report(y_true,
                                      y_pred,
                                      labels=np.arange(num_classes),
                                      target_names=att_classes))
            state_msg2 = f'm_precision: {prec / val_i: 0.2f} %; epoch {i}; m_val_loss: {val_loss / val_i: 0.2f}'
            epoch_bar.set_description(state_msg2)
コード例 #10
0
def generate(parser):
    args = parser.parse_args()

    argsObj=vars(args)
    print(f"generate args: {argsObj}")

    model, config, model_name = load_model_checkp(**vars(args))
    latentDim = model.config.categoryVectorDim_G

    # We load a dummy data loader for post-processing
    postprocess = AudioPreprocessor(**config['transformConfig']).get_postprocessor()
    #### I WANT TO ADD NORMALIZATION HERE  ######################
    print(f"postprocess: {postprocess}")

    # Create output evaluation dir
    output_dir = mkdir_in_path(args.dir, f"generation_tests")
    output_dir = mkdir_in_path(output_dir, model_name)
    output_dir = mkdir_in_path(output_dir, "2D")
    output_dir = mkdir_in_path(output_dir, datetime.now().strftime('%Y-%m-%d %H:%M'))


    # Create evaluation manager
    eval_manager = StyleGEvaluationManager(model, n_gen=2)

    z0=torch.load(argsObj["z0"])
    z1=torch.load(argsObj["z1"])
    if argsObj["z2"] == None :
        z2=0
    else :
        z2=torch.load(argsObj["z2"])
    if argsObj["z3"] == None : 
        z3 = 0
    else :
        z3=torch.load(argsObj["z3"])


    interp_steps0=int(argsObj["d0"])
    interp_steps0norm=interp_steps0 -1 # because the batch generater will spread the steps out to include both endpoints

    interp_steps1=int(argsObj["d1"])
    interp_steps1norm=interp_steps1 -1 # because the batch generater will spread the steps out to include both endpoints

    usePM=argsObj["pm"]
    print(f"interp_steps0 is {interp_steps0}, interp_steps1 is {interp_steps1}, and usePM (use ParamManager) is {usePM}")



    #######   ---- unconditioned 
    gen_batch, latents = eval_manager.unconditioned_linear_interpolation(line0z0=z0, line0z1=z1, line1z0=z2, line1z1=z3, d0steps=interp_steps0, d1steps=interp_steps1, d1nvar=argsObj["d1nvar"], d1var=argsObj["d1var"])

    g=list(gen_batch)
    #for k in length

    audio_out = map(postprocess, gen_batch)


        #save the .pt file no matter what since we may want to use them to zoom in, resample, or whatever.
    if not usePM :  
        saveAudioBatch(audio_out,
                   path=output_dir,
                   basename='test_2D4pt', 
                   sr=config["transformConfig"]["sample_rate"],
                   latents=latents)

    else :                      # save paramManager files, (and don't write latents separately)
        data=list(audio_out) #LW it was a map, make it a list
        zdata=zip(data,latents) #zip so we can enumerate through pairs of data/latents


        vstep=-1  # gets incremented in loop

        rowlength=interp_steps0*argsObj["d1nvar"]
        print(f'rowlength is {rowlength}')

        for k, (audio, params) in enumerate(zdata) :
            istep = int(k/rowlength)  #the outer counter, orthogonal to the two lines defining the submanifold

            j=k%rowlength
            jstep=int(j/argsObj["d1nvar"])
            vstep=(vstep+1)%argsObj["d1nvar"]

            print(f'doing row {istep}, col {jstep}, and variation {vstep}')

            if type(audio) != np.ndarray:
                audio = np.array(audio, float)

            path=output_dir
            basename='test_2D4pt' 
            sr=config["transformConfig"]["sample_rate"]

            #foo=f'{basename}_{jstep}_{vstep}.wav'

            out_path = os.path.join(path, f'{basename}_d1.{istep}_d0.{jstep}_v.{vstep}.wav')
            # paramManager, create 
            pm=paramManager.paramManager(out_path, output_dir)  ##-----------   paramManager  interface ------------------##
            #param_out_path = os.path.join(path, f'{basename}_{i}.params')
            pm.initParamFiles(overwrite=True)


            #also save pt files in case we want to use them to create other grids, scales, etc.
            pt_param_out_path = os.path.join(path, f'{basename}_d1.{istep}_d0.{jstep}_v.{vstep}.pt')
            torch.save(params, pt_param_out_path)

            if not os.path.exists(out_path):
                #write_wav(out_path, audio.astype(float), sr)
                sf.write(out_path, audio.astype(float), sr)

                duration=len(audio.astype(float))/float(sr)
                #print(f"duration is {duration}")
                if latents != None :
                    #pm.addParam(out_path, "dim1", [0.0,duration], [(p-minpitch)/pitchrange,(p-minpitch)/pitchrange], units="norm, midip in[58,70]", nvals=0, minval='null', maxval='null')
                    pm.addParam(out_path, "dim0", [0.0,duration], [jstep/interp_steps0norm,jstep/interp_steps0norm], units=f'norm, interp steps in[0,{interp_steps0}]', nvals=interp_steps0, minval='null', maxval='null')
                    if interp_steps1norm > 0 : #else just doing 1D interpolation
                        pm.addParam(out_path, "dim1", [0.0,duration], [istep/interp_steps1norm,istep/interp_steps1norm], units=f'norm, interp steps in[0,{interp_steps1}]', nvals=interp_steps1, minval='null', maxval='null')
                
                    segments=11 # to include a full segment for each value including endpoints
                    envTimes, envVals=makesteps(np.linspace(0,duration,segments+1,True) , np.linspace(0,1,segments,True)) #need one extra time to flank each value
                    pm.addParam(out_path, "envPt", envTimes, envVals, units=f"norm, duration in[0,{duration}]", nvals=0, minval='null', maxval='null')

                    # write paramfile 
                    #torch.save(params, param_out_path)
                    #np.savetxt(txt_param_out_path, params.cpu().numpy())
            else:
                print(f"saveAudioBatch: File {out_path} exists. Skipping...")
                continue