def walk_directory(singer_name: str, mode: str='sing'):
    """
    Go through a singer directory, processing all files in the directory.
    Arguments:
        singer_name: The name of the singer.
        mode: either sing or read.
    """
    print("Processing data for NUS singer {}".format(singer_name))
    print("Processing the {} directory".format(mode))

    wav_dir = config.raw_dirs['nus']
    full_dir = os.path.join(wav_dir, singer_name, mode)
    sing_wav_files = [x for x in os.listdir(full_dir) if x.endswith('.wav') and not x.startswith('.')]

    for count, lf in enumerate(sing_wav_files):
        utils.progress(count, len(sing_wav_files), "folder processed")
        audio, fs = audio_process.load_audio(os.path.join(full_dir, lf))

        embedding = get_embedding_GE2E(os.path.join(full_dir, lf))

        segments, timestamps, feat, note, stft = audio_process.process_audio(audio)

        phonemes = midi_process.open_lab_file(os.path.join(full_dir, lf[:-4]+".txt"))

        phos = np.array(midi_process.pho_segment_allign(phonemes, timestamps))

        for j, (fea, nots, stf, pho)  in enumerate(zip(feat, note, stft, phos)):
            singer_dict = {}
            featy, notey, stfty, phosy = utils.match_time([fea, nots, stf, pho])
            singer_dict['embedding'] = embedding
            singer_dict['feats'] = featy
            singer_dict['notes'] = notey
            singer_dict['phons'] = phosy
            singer_dict['stfts'] = stfty
            write_data.write_data(singer_dict, "nus_{}_{}_{}.hdf5".format(singer_name, lf[:-4], j))
def walk_directory(wav_dir):
    """
    Go through a song directory, processing all files in the directory.
    Arguments:
        singer_name: The name of the song.
    """
    print("Processing data for the DAMP intonation dataset")
    full_dir = os.path.join(wav_dir, 'vocal_tracks')
    songs = [
        x for x in os.listdir(full_dir)
        if x.endswith('m4a') and not x.startswith('.')
    ]

    df = pd.read_csv(os.path.join(wav_dir, "intonation.csv"))
    singers = []

    for count, lf in enumerate(songs):
        song_name = lf.split('.')[0]
        singer_name = df.query('performance_id == "{}"'.format(
            songs[0].split('.')[0]))[' account_id'].values[0].strip()
        if singer_name in config.damp_singers:
            singers.append(singer_name)
            song_name = song_name.replace('_', '-')
            utils.progress(count, len(songs), "folder processed")
            audio, fs = audio_process.load_audio(os.path.join(full_dir, lf))
            try:
                segments, timestamps, feat, note, stft = audio_process.process_audio(
                    audio)

                for j, (fea, nots, stf) in enumerate(zip(feat, note, stft)):
                    singer_dict = {}
                    feat[j], note[j], stft[j] = utils.match_time(
                        [fea, nots, stf])

                    singer_dict['feats'] = feat[j]
                    singer_dict['notes'] = note[j]
                    singer_dict['stfts'] = stft[j]
                    write_data.write_data(
                        singer_dict,
                        "damp_{}_{}_{}.hdf5".format(singer_name, song_name, j))
            except:
                print("Error in file {}".format(song_name))
def walk_directory(song_name: str):
    """
    Go through a song directory, processing all files in the directory.
    Arguments:
        singer_name: The name of the song.
    """
    print("Processing data for CSD song {}".format(song_name))

    wav_dir = config.raw_dirs['choralsingingdataset']
    full_dir = os.path.join(wav_dir, song_name, 'IndividualVoices')
    sing_wav_files = [
        x for x in os.listdir(full_dir) if x.endswith('.wav')
        and not x.startswith('.') and not x.endswith('-24b.wav')
    ]

    for count, lf in enumerate(sing_wav_files):
        singer_name = lf.split('_')[1] + lf.split('_')[2].replace('.wav', '')
        utils.progress(count, len(sing_wav_files), "folder processed")
        audio, fs = audio_process.load_audio(os.path.join(full_dir, lf))
        try:

            segments, timestamps, feat, note, stft = audio_process.process_audio(
                audio)

            for j, (fea, nots, stf) in enumerate(zip(feat, note, stft)):
                singer_dict = {}
                feat[j], note[j], stft[j] = utils.match_time([fea, nots, stf])

                singer_dict['feats'] = feat[j]
                singer_dict['notes'] = note[j]
                singer_dict['stfts'] = stft[j]
                write_data.write_data(
                    singer_dict,
                    "csd_{}_{}_{}.hdf5".format(singer_name, song_name, j))
        except:
            print(lf)
Example #4
0
    def train(self):
        """
        Function to train the model, and save Tensorboard summary, for N epochs. 
        """
 

        start_epoch = int(self.sess.run(tf.train.get_global_step()) / (config.autovc_batches_per_epoch_train))


        print("Start from: %d" % start_epoch)


        for epoch in range(start_epoch, config.autovc_num_epochs):

            data_generator = data_pipeline.data_gen_vc()
            val_generator = data_pipeline.data_gen_vc(mode = 'Val')
            

            epoch_final_loss = 0
            epoch_recon_loss = 0
            epoch_recon_0_loss = 0
            epoch_content_loss = 0

            val_final_loss = 0
            val_recon_loss = 0
            val_recon_0_loss = 0
            val_content_loss = 0

            batch_num = 0

            start_time = time.time()

            with tf.variable_scope('Training'):
                for feats_targs, targets_speakers in data_generator:


                    final_loss, recon_loss, recon_loss_0, content_loss,  summary_str = self.train_model(feats_targs, targets_speakers, self.sess)

                    epoch_final_loss+=final_loss
                    epoch_recon_loss+=recon_loss
                    epoch_recon_0_loss+=recon_loss_0
                    epoch_content_loss+=content_loss

                    self.train_summary_writer.add_summary(summary_str, epoch)
                    self.train_summary_writer.flush()

                    utils.progress(batch_num,config.autovc_batches_per_epoch_train, suffix = 'training done')

                    batch_num+=1

                epoch_final_loss = epoch_final_loss/batch_num
                epoch_recon_loss = epoch_recon_loss/batch_num
                epoch_recon_0_loss = epoch_recon_0_loss/batch_num
                epoch_content_loss = epoch_content_loss/batch_num

                print_dict = {"Final Loss": epoch_final_loss}

                print_dict["Recon Loss"] =  epoch_recon_loss
                print_dict["Recon Loss_0 "] =  epoch_recon_0_loss
                print_dict["Content Loss"] =  epoch_content_loss


            batch_num = 0
            with tf.variable_scope('Validation'):
                for feats_targs, targets_speakers in val_generator:


                    final_loss, recon_loss, recon_loss_0, content_loss,  summary_str = self.validate_model(feats_targs, targets_speakers, self.sess)

                    val_final_loss+=final_loss
                    val_recon_loss+=recon_loss
                    val_recon_0_loss+=recon_loss_0
                    val_content_loss+=content_loss

                    self.val_summary_writer.add_summary(summary_str, epoch)
                    self.val_summary_writer.flush()

                    utils.progress(batch_num,config.autovc_batches_per_epoch_val, suffix = 'validation done')

                    batch_num+=1

                val_final_loss = val_final_loss/batch_num
                val_recon_loss = val_recon_loss/batch_num
                val_recon_0_loss = val_recon_0_loss/batch_num
                val_content_loss = val_content_loss/batch_num

                print_dict["Val Final Loss"] = val_final_loss

                print_dict["Val Recon Loss"] =  val_recon_loss
                print_dict["Val Recon Loss_0 "] =  val_recon_0_loss
                print_dict["Val Content Loss"] =  val_content_loss



            end_time = time.time()
            if (epoch + 1) % config.print_every == 0:
                self.print_summary(print_dict, epoch, end_time-start_time)
            if (epoch + 1) % config.save_every == 0 or (epoch + 1) == config.autovc_num_epochs:
                self.save_model(self.sess, epoch+1, config.autovc_emb_log_dir)
def get_stats():
    """
    Get the maximum and minimum feat values for the datasets to use. 
    """
    datasets = "".join("_" + x.lower() for x in config.datasets)

    voc_list = [
        x for x in os.listdir(config.feats_dir)
        if x.endswith('.hdf5') and x.split('_')[0].upper() in config.datasets
    ]

    max_feat = np.zeros(66)
    min_feat = np.ones(66) * 1000

    count = 0

    too_small = []

    for count, voc_to_open in enumerate(voc_list):

        with h5py.File(os.path.join(config.feats_dir, voc_to_open),
                       "r") as voc_file:

            feats = voc_file["feats"][()]

        if len(feats) <= config.max_phr_len:
            too_small.append(voc_to_open)
            import pdb
            pdb.set_trace()
            # os.remove(os.path.join(config.feats_dir,voc_to_open))
            # print("Deleted file {}".format(voc_to_open))
        else:
            f0 = feats[:, -2]

            med = np.median(f0[f0 > 0])

            f0[f0 == 0] = med

            feats[:, -2] = f0

            maxi_voc_feat = np.array(feats).max(axis=0)

            for i in range(len(maxi_voc_feat)):
                if maxi_voc_feat[i] > max_feat[i]:
                    max_feat[i] = maxi_voc_feat[i]

            mini_voc_feat = np.array(feats).min(axis=0)

            for i in range(len(mini_voc_feat)):
                if mini_voc_feat[i] < min_feat[i]:
                    min_feat[i] = mini_voc_feat[i]
            count += 1

        utils.progress(count, len(voc_list), "Processed")

    with h5py.File(config.stat_file, mode='w') as hdf5_file:

        hdf5_file.create_dataset("feats_maximus", [66], np.float32)
        hdf5_file.create_dataset("feats_minimus", [66], np.float32)
        hdf5_file["feats_maximus"][:] = max_feat
        hdf5_file["feats_minimus"][:] = min_feat
    config.change_variable("stat_prep", "prep", "True")