def __init__(self,
                 roots,
                 sample_rate,
                 max_time,
                 target_level=-25,
                 noise_proportion=0,
                 noise_type='gaussian',
                 snrs=[3],
                 eps=1e-8,
                 **kwargs):
        self.sample_rate = sample_rate
        self.max_time = max_time
        self.target_level = target_level
        self.eps = eps

        self.filepths = []
        for root in roots:
            self.filepths += find_files(root)
        assert len(self.filepths) > 0, 'No audio file detected'

        self.noise_proportion = noise_proportion
        self.snrs = snrs
        if noise_type == 'gaussian':
            self.noise_sampler = torch.distributions.Normal(0, 1)
        else:
            self.noise_wavpths = find_files(noise_type)
예제 #2
0
def SaveSTFT_Arg(pitch_shift, time_stretch, argtime):
    targetlist = find_files(C.target_path, ext="wav")
    noiselist = find_files(C.noise_path, ext="wav")
    noise_num = len(noiselist)
    target_index = 0
    for targetfile in tqdm(targetlist):

        target_mag, _ = LoadAudio_Arg(targetfile)
        norm = target_mag.max()
        skip_count = 0
        if target_mag.shape[0] > C.PATCH_LENGTH:

            step = target_mag.shape[0] // C.PATCH_LENGTH

            for i in tqdm(range(step), leave=False):
                target_mag_p = target_mag[:, i * C.PATCH_LENGTH:(i + 1) *
                                          C.PATCH_LENGTH]
                target_mag_p /= norm

                noise_file = noiselist[random.randint(0, noise_num - 1)]
                noise_mag, _ = LoadAudio(noise_file)
                noise_mag = LengthAdjuster(noise_mag)
                noise_mag /= norm

                addnoise_mag = target_mag_p + noise_mag
                addnoise_mag /= norm
                fname = str(target_index) + "_" + str(i) + str(argtime)
                np.savez(os.path.join(C.PATH_FFT, fname + "_arg.npz"),
                         speech=target_mag_p,
                         addnoise=addnoise_mag)
        else:
            skip_count += 1

    print("SKIP:", skip_count)
예제 #3
0
def get_ann_audio(directory):
    '''Get a list of annotations and audio files from a directory.

    This also validates that the lengths match and are paired properly.

    Parameters
    ----------
    directory : str
        The directory to search

    Returns
    -------
    pairs : list of tuples (audio_file, annotation_file)
    '''

    audio = find_files(directory)
    annos = find_files(directory, ext=['jams', 'jamz'])

    paired = list(zip(audio, annos))

    if len(audio) != len(annos) or any(
        [base(aud) != base(ann) for aud, ann in paired]):
        raise DataError(
            'Unmatched audio/annotation data in {}'.format(directory))

    return paired
예제 #4
0
def sample_wavs_and_dump_txt(root,dev_ids, numbers, meta_data_name):
    
    wav_list = []
    count_positive = 0
    for _ in range(numbers):
        prob = random.random()
        if (prob > 0.5):
            dev_id_pair = random.sample(dev_ids, 2)

            # sample 2 wavs from different speaker
            sample1 = "/".join(random.choice(find_files(os.path.join(root,dev_id_pair[0]))).split("/")[-3:])
            sample2 = "/".join(random.choice(find_files(os.path.join(root,dev_id_pair[1]))).split("/")[-3:])

            label = "0"

            wav_list.append(" ".join([label, sample1, sample2]))
            
        else:
            dev_id_pair = random.sample(dev_ids, 1)
            
            # sample 2 wavs from same speaker
            sample1 = "/".join(random.choice(find_files(os.path.join(root,dev_id_pair[0]))).split("/")[-3:])
            sample2 = "/".join(random.choice(find_files(os.path.join(root,dev_id_pair[0]))).split("/")[-3:])

            label = "1"
            count_positive +=1

            wav_list.append(" ".join([label, sample1, sample2]))
    
    f = open(meta_data_name,"w")
    for data in wav_list:
        f.write(data+"\n")
    f.close()

    return wav_list
예제 #5
0
def makeCQTexOvertone():
    TORIDASUBASHO = PATH_pwCQT
    HOZONBASYO = PATH_pwov2CQT  # n倍音を保存するディレクトリ
    OVTONE = 2

    audiolist = find_files(PATH_AUDIO, ext="wav")
    cqtlist = find_files(TORIDASUBASHO, ext="npy")
    itemlist = len(audiolist)
    i_counter = 1
    for audiofile, cqtfile in zip(audiolist, cqtlist):
        print("{}/{}".format(i_counter, itemlist))
        filename = audiofile.split('/')[-1]
        albname = audiofile.split('/')[-2]
        # foldname = audiofile.split('/')[-3]
        cqt_filename = cqtfile.split("/")[-1]
        if (filename.split(".")[0] != cqt_filename.split(".")[0]):
            print("file_not_match", filename, cqt_filename)


# ディレクトリチェック

        if not (os.path.exists(HOZONBASYO + '/' + albname)):
            os.makedirs(HOZONBASYO + '/' + albname)
        if not (os.path.exists(HOZONBASYO + '/' + albname + '/' + filename +
                               '.npy')):
            wav, sr = load(audiofile, sr=SR)
            cqt = np.load(cqtfile)
            excqt = exOvertone(cqt, wav, overtone=OVTONE)
            np.save(HOZONBASYO + '/' + albname + '/' + filename + '.npy',
                    np.array(excqt, dtype="float32"))
        i_counter += 1
def TrainConvnetExtractorDeepChroma(trainidx,
                                    epoch=20,
                                    saveas="convnet.model"):
    cqtfilelist = np.array(find_files(const.PATH_HCQT, ext="npy"))[trainidx]
    labfilelist = np.array(
        find_files(const.PATH_CHORDLAB, ext=["lab", "chords"]))[trainidx]
    #midifilelist = find_files(const.PATH_MIDI,ext="mid")[:filecnt]
    config.train = True
    config.enable_backprop = True
    convnet = networks.ConvnetFeatExtractor()
    model = networks.ConvnetPredictor(convnet)
    model.to_gpu(0)
    opt = optimizers.MomentumSGD()
    opt.setup(model)
    print("DeepChroma Convnet Training...")
    print("start epochs...")
    S = []
    T = []

    for cqtfile, labfile in zip(cqtfilelist, labfilelist):
        cqt = np.load(cqtfile)
        spec = utils.PreprocessSpec(cqt[:const.CQT_H, :, :])
        targ = voc.LoadChromaTarget(labfile)
        minlen = min([cqt.shape[1], targ.shape[0]])
        S.append(spec[:, :minlen, :])
        T.append(targ[:minlen, :])
    S = np.concatenate(S, axis=1)
    T = np.concatenate(T, axis=0)
    assert (S.shape[1] == T.shape[0])

    for ep in range(epoch):
        sum_loss = 0

        randidx = np.random.randint(0,
                                    S.shape[1] - const.CONV_TRAIN_SEQLEN - 1,
                                    S.shape[1] // const.CONV_TRAIN_SEQLEN * 4)
        for i in range(0, randidx.size - const.CONV_TRAIN_BATCH,
                       const.CONV_TRAIN_BATCH):
            x_batch = np.stack([
                S[:, randidx[j]:randidx[j] + const.CONV_TRAIN_SEQLEN, :]
                for j in range(i, i + const.CONV_TRAIN_BATCH)
            ])
            t_batch = np.stack([
                T[randidx[j]:randidx[j] + const.CONV_TRAIN_SEQLEN, :]
                for j in range(i, i + const.CONV_TRAIN_BATCH)
            ])
            x_in = cp.asarray(x_batch)
            t_in = cp.asarray(t_batch)
            model.cleargrads()
            loss = model(x_in, t_in)
            loss.backward()
            opt.update()
            sum_loss += loss.data

        convnet.save(saveas)
        print("epoch: %d/%d  loss:%.04f" %
              (ep + 1, epoch, sum_loss / const.CONV_TRAIN_BATCH))

    convnet.save(saveas)
def EvaluateChord(idx, verbose=True, sonify=False, cross=False):
    lablist = np.array(find_files(const.PATH_CHORDLAB, ext=["lab",
                                                            "chords"]))[idx]
    est_lablist = np.array(find_files(
        const.PATH_ESTIMATE_CROSS, ext="lab"))[idx] if cross else find_files(
            const.PATH_ESTIMATE, ext="lab")
    scorelist_majmin = np.array([])
    scorelist_sevenths = np.array([])
    scorelist_majmininv = np.array([])
    scorelist_seventhinv = np.array([])
    durations = np.array([])
    confmatrix = np.zeros((const.N_CHORDS, const.N_CHORDS))
    song_durations = np.array([])
    for labfile, estfile in zip(lablist, est_lablist):
        (ref_intervals,
         ref_labels) = mir_eval.io.load_labeled_intervals(labfile)
        (est_intervals,
         est_labels) = mir_eval.io.load_labeled_intervals(estfile)
        est_intervals, est_labels = mir_eval.util.adjust_intervals(
            est_intervals, est_labels, ref_intervals.min(),
            ref_intervals.max(), mir_eval.chord.NO_CHORD,
            mir_eval.chord.NO_CHORD)
        (intervals, ref_labels,
         est_labels) = mir_eval.util.merge_labeled_intervals(
             ref_intervals, ref_labels, est_intervals, est_labels)
        durations = mir_eval.util.intervals_to_durations(intervals)

        comparisons_sevenths = mir_eval.chord.sevenths(ref_labels, est_labels)
        comparisons_majmininv = mir_eval.chord.majmin_inv(
            ref_labels, est_labels)
        comparisons_seventhinv = mir_eval.chord.sevenths_inv(
            ref_labels, est_labels)
        comparisons_majmin = mir_eval.chord.majmin(ref_labels, est_labels)

        score_majmin = mir_eval.chord.weighted_accuracy(
            comparisons_majmin, durations)
        scorelist_majmin = np.append(scorelist_majmin, score_majmin)
        score_sevenths = mir_eval.chord.weighted_accuracy(
            comparisons_sevenths, durations)
        scorelist_sevenths = np.append(scorelist_sevenths, score_sevenths)
        score_majmininv = mir_eval.chord.weighted_accuracy(
            comparisons_majmininv, durations)
        scorelist_majmininv = np.append(scorelist_majmininv, score_majmininv)
        score_seventhinv = mir_eval.chord.weighted_accuracy(
            comparisons_seventhinv, durations)
        scorelist_seventhinv = np.append(scorelist_seventhinv,
                                         score_seventhinv)
        if verbose:
            print("%s --- %.3f" % (labfile.split('/')[-1], score_majmin))

        for i in range(len(ref_labels)):
            confmatrix[voc.GetChordIDSign(ref_labels[i]),
                       voc.GetChordIDSign(est_labels[i])] += durations[i]
        song_durations = np.append(song_durations, np.sum(durations))
    return scorelist_majmin, scorelist_sevenths, scorelist_majmininv, scorelist_seventhinv, confmatrix, song_durations
예제 #8
0
 def __init__(self,idx,rand_shift=False):
     self.list_labfile = np.array(find_files(C.PATH_CHORDLAB,ext=["lab","chords"]))[idx]
     #self.list_cqtfile = np.array(find_files(C.PATH_CQT,ext="npy"))[idx]
     self.list_featfile = np.array(find_files(C.PATH_FEAT,ext="npy"))[idx]
     #self.list_mfccfile = np.array(find_files(C.PATH_MFCC,ext="npy"))[idx]
     
     #self.list_cqt = [U.normalize_spec(np.load(f)) for f in self.list_cqtfile]
     self.list_feat = [np.load(f)[:,:C.N_DIMS_FEAT] for f in self.list_featfile]
     self.labs_list,self.lab_intervals_list = LoadLabelSet(self.list_labfile,C.label_shifts[idx])
     #self.list_mfcc = [np.load(f).T for f in self.list_mfccfile]
     
     self.rand_shift = rand_shift
예제 #9
0
    def __init__(self, root):
        seed = random.randint(1, 1000)
        random.seed(seed)
        wav_root = Path(root) / "wav48"

        wav_files = []
        metadata = defaultdict(list)
        speaker_dirs = [
            speaker_dir for speaker_dir in wav_root.iterdir()
            if speaker_dir.is_dir()
        ]

        for speaker_dir in speaker_dirs:
            if speaker_dir.stem in self._except_folder:
                continue
            for wav_file in find_files(speaker_dir):
                wav_file = str(PurePosixPath(wav_file).relative_to(root))
                wav_files.append(wav_file)
                speaker_id = self.get_speaker(wav_file)
                metadata[speaker_id].append(wav_file)

        self.root = root
        self.seed = seed
        self.wav_files = wav_files
        self.metadata = metadata
def CCMixter():
    '''
    mix : original wav file
    source_1 : inst wav file 
    source_2 : vocal wac file 
    '''
    Audiolist = os.listdir('./data')

    spec_dir = "./Spectrogram"
    if os.path.exists(spec_dir) is False:
        os.mkdir(spec_dir)

    for audio in Audiolist:
        try:
            audio_path = os.path.join('./data/' + audio)
            print("Song : %s" % audio)
            if os.path.exists(os.path.join(spec_dir, audio + '.npz')):
                print("Already exist!! Skip....")
                continue
            aud = find_files(audio_path, ext="wav")

            mix, _ = load(aud[0], sr=None)
            inst, _ = load(aud[1], sr=None)
            vocal, _ = load(aud[2], sr=None)
            print("Saving...")

            SaveSpectrogram(mix, inst, vocal, audio)
        except IndexError as e:
            print("Wrong Directory")
            pass
def EstimateChord(idx, dnnmodel, todir=False):
    #dnn = networks.FeatureDNN()
    #dnn = networks.ConvnetFeatExtractor()
    dnn = networks.FullCNNFeatExtractor()
    #dnn = networks.NoOperation()
    dnn.load(dnnmodel)
    dnn.to_gpu(0)
    decoder = networks.NBLSTMCRF()
    decoder.load()
    decoder.to_gpu(0)
    cqtfilelist = np.array(find_files(const.PATH_HCQT, ext="npy"))[idx]
    i = 0
    chainer.config.train = False
    chainer.config.enable_backprop = False
    for cqtfile in cqtfilelist:
        cqt = utils.Embed(utils.PreprocessSpec(np.load(cqtfile)[:, :, :]), 1)
        chroma = dnn.GetFeature(cp.asarray(cqt)).data
        path = decoder.argmax(chroma)
        feat = cp.asnumpy(chroma)
        if todir:
            fname = cqtfile.split("/")[-1] + ".lab"
            alb = cqtfile.split("/")[-2]
            utils.SaveEstimatedLabelsFramewise(
                path, const.PATH_ESTIMATE_CROSS + alb + "/" + fname, feat)
        else:
            utils.SaveEstimatedLabelsFramewise(
                path, const.PATH_ESTIMATE + "%03d.lab" % i, feat)
        i += 1
예제 #12
0
def visualize(data_dirs, wav2mel_path, checkpoint_path, output_path):
    """Visualize high-dimensional embeddings using t-SNE."""

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    wav2mel = torch.jit.load(wav2mel_path)
    dvector = torch.jit.load(checkpoint_path).eval().to(device)

    print("[INFO] model loaded.")

    n_spkrs = 0
    paths, spkr_names, mels = [], [], []

    for data_dir in data_dirs:
        data_dir_path = Path(data_dir)
        for spkr_dir in [x for x in data_dir_path.iterdir() if x.is_dir()]:
            n_spkrs += 1
            audio_paths = find_files(spkr_dir)
            spkr_name = spkr_dir.name
            for audio_path in audio_paths:
                paths.append(audio_path)
                spkr_names.append(spkr_name)

    for audio_path in tqdm(paths, ncols=0, desc="Preprocess"):
        wav_tensor, sample_rate = torchaudio.load(audio_path)
        with torch.no_grad():
            mel_tensor = wav2mel(wav_tensor, sample_rate)
        mels.append(mel_tensor)

    embs = []

    for mel in tqdm(mels, ncols=0, desc="Embed"):
        with torch.no_grad():
            emb = dvector.embed_utterance(mel.to(device))
            emb = emb.detach().cpu().numpy()
        embs.append(emb)

    tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
    transformed = tsne.fit_transform(embs)

    print("[INFO] embeddings transformed.")

    data = {
        "dim-1": transformed[:, 0],
        "dim-2": transformed[:, 1],
        "label": spkr_names,
    }

    plt.figure()
    sns.scatterplot(
        x="dim-1",
        y="dim-2",
        hue="label",
        palette=sns.color_palette(n_colors=n_spkrs),
        data=data,
        legend="full",
    )
    plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    plt.tight_layout()
    plt.savefig(output_path)
예제 #13
0
def ConfMatrix_Allclass(idx):
    lab_list = np.array(find_files(C.PATH_CHORDLAB, ext=["lab",
                                                         "chords"]))[idx]
    estimated_lab_list = [
        os.path.join(C.PATH_ESTIMATE_CROSS,
                     p.split("/")[-2],
                     p.split("/")[-1]) for p in lab_list
    ]
    durations = np.array([])
    confmatrix = np.zeros((C.N_VOCABULARY_TRIADS, C.N_VOCABULARY_TRIADS))

    for labfile, estfile in zip(lab_list, estimated_lab_list):
        (ref_intervals,
         ref_labels) = mir_eval.io.load_labeled_intervals(labfile)
        (est_intervals,
         est_labels) = mir_eval.io.load_labeled_intervals(estfile)
        est_intervals, est_labels = mir_eval.util.adjust_intervals(
            est_intervals, est_labels, ref_intervals.min(),
            ref_intervals.max(), mir_eval.chord.NO_CHORD,
            mir_eval.chord.NO_CHORD)
        (intervals, ref_labels,
         est_labels) = mir_eval.util.merge_labeled_intervals(
             ref_intervals, ref_labels, est_intervals, est_labels)
        durations = mir_eval.util.intervals_to_durations(intervals)
        ref_labels_id = encode_chordseq_hierarchical(ref_labels)
        est_labels_id = encode_chordseq_hierarchical(est_labels)
        for i in range(len(ref_labels)):
            confmatrix[ref_labels_id[i, 0], est_labels_id[i,
                                                          0]] += durations[i]

    confmatrix /= np.sum(confmatrix, axis=1, keepdims=True)
    return confmatrix
예제 #14
0
def makeCQTData():
    SAVEDIR = PATH_pwCQT

    audiolist = find_files(PATH_AUDIO, ext="wav")
    itemlist = len(audiolist)
    for i, audiofile in enumerate(audiolist):
        print("{}/{}".format(i + 1, itemlist))
        wav, sr = load(audiofile, sr=SR)
        filename = audiofile.split('/')[-1]
        albname = audiofile.split('/')[-2]
        # foldname = audiofile.split('/')[-3]
        # ディレクトリチェック
        if not (os.path.exists(SAVEDIR + '/' + albname)):
            os.makedirs(SAVEDIR + '/' + albname)
        if not (os.path.exists(SAVEDIR + '/' + albname + '/' + filename +
                               '.npy')):
            cqt_spec, freqs = cqt(wav, sr, fmin="C1")

            # cqt_power = np.abs(cqt_spec)  # sqrtモードで行こう # 2019.12.22
            cqt_power = np.array(
                [np.sqrt(c.real**2 + c.imag**2) for c in cqt_spec],
                dtype="float32")
            cqt_power = np.power(cqt_power, 2)  # power 系1
            cqt_power *= 2  # power 系2

            cqt_power = cqt_power.reshape(1, cqt_power.shape[0],
                                          cqt_power.shape[1])
            np.save(SAVEDIR + '/' + albname + '/' + filename + '.npy',
                    np.array(cqt_power, dtype="float32"))
예제 #15
0
def load_npz(target=None, first=None):
    npz_files = find_files('../DSD100_Npz/Dev', ext="npz")[:first]
    # npz_files = find_files('../numpy', ext="npz")[:first]
    for file in npz_files:
        npz = np.load(file)
        assert (npz["mix"].shape == npz[target].shape)
        yield npz['mix'], npz[target]
예제 #16
0
    def __init__(self, file_path, meta_data, max_timestep=None):

        self.roots = file_path
        self.root_key = list(self.roots.keys())
        self.max_timestep = max_timestep

        # extract dev speaker and store in self.black_list_spealers
        with open(meta_data, "r") as f:
            self.black_list_speakers = f.read().splitlines()

        # calculate speakers and support to remove black list speaker (dev)
        self.all_speakers = \
            [f.path for key in self.root_key for f in os.scandir(self.roots[key]) if f.is_dir()]
        self.speaker_num = len(self.all_speakers)
        self.necessary_dict = self.processing()
        self.label_mapping_spk_id = {}
        # speaker id  map to speaker num
        self.build_label_mapping()

        print("search all wavs paths")
        start = time.time()
        self.dataset = []
        for speaker in tqdm.tqdm(self.all_speakers):
            wav_list = find_files(speaker)
            self.dataset.extend(wav_list)
        end = time.time()
        print(f"search all wavs paths costs {end-start} seconds")

        self.label = self.build_label(self.dataset)
예제 #17
0
    def __init__(self, root):
        seed = random.randint(1, 1000)
        random.seed(seed)

        if (Path(root) / "by_book").exists():
            _root = Path(root) / "by_book"
        else:
            _root = Path(root)

        wav_files = []
        metadata = defaultdict(list)
        speaker_dirs = [
            speaker_dir for speaker_dir in (_root / "female").iterdir()
            if speaker_dir.is_dir()
        ]
        speaker_dirs += [
            speaker_dir for speaker_dir in (_root / "male").iterdir()
            if speaker_dir.is_dir()
        ]

        for speaker_dir in speaker_dirs:
            for wav_file in find_files(speaker_dir):
                wav_file = str(PurePosixPath(wav_file).relative_to(root))
                wav_files.append(wav_file)
                speaker_id = self.get_speaker(wav_file)
                metadata[speaker_id].append(wav_file)

        self.root = root
        self.seed = seed
        self.wav_files = wav_files
        self.metadata = metadata
예제 #18
0
    def __init__(self, vad_config, key_list, file_path, meta_data, max_timestep=None):
    
        self.roots = file_path
        self.root_key = key_list
        self.max_timestep = max_timestep
        self.vad_c = vad_config 
        self.dataset = []
        self.all_speakers = []

        for index in range(len(self.root_key)):
            cache_path = Path(os.path.dirname(__file__)) / 'cache_wav_paths' / f'cache_{self.root_key[index]}.p'
            p = Path(self.roots[index])

            # loca cache_path if file exists
            if os.path.isfile(cache_path):
                # cache dict: 
                # {
                #   "speaker_id1": ["wav_a_path1", "wav_a_path2", ...],
                #   "speaker_id2": ["wav_b_path1", "wav_b_path2", ...],
                #   ...,
                # }
                cache_wavs_dict = pickle.load(open(cache_path,"rb"))
                self.all_speakers.extend(list(cache_wavs_dict.keys()))
                for speaker_id in list(cache_wavs_dict.keys()):
                    for wavs in cache_wavs_dict[speaker_id]:
                        utterance_id = "/".join(str(p/speaker_id/wavs).split("/")[-3:]).replace(".wav","").replace("/","-")                        
                        self.dataset.append([str(p / speaker_id / wavs), utterance_id])

            else:
                speaker_wav_dict = {}
                speaker_dirs = [f.path.split("/")[-1] for f in os.scandir(self.roots[index]) if f.is_dir()]
                self.all_speakers.extend(speaker_dirs)

                print("search all wavs paths")
                start = time.time()
                for speaker in tqdm.tqdm(speaker_dirs):
                    speaker_dir =  p / speaker
                    wav_list=find_files(speaker_dir)
                    speaker_wav_dict[speaker] = []
                    for wav in wav_list:
                        wav_sample, _ = apply_effects_file(str(speaker_dir/wav), EFFECTS)
                        wav_sample = wav_sample.squeeze(0)
                        length = wav_sample.shape[0]

                        if length > self.vad_c['min_sec']:
                            utterance_id = "/".join(str(speaker_dir/wav).split("/")[-3:]).replace(".wav","").replace("/","-") 
                            self.dataset.append([str(speaker_dir/wav), utterance_id])
                            speaker_wav_dict[speaker].append("/".join(wav.split("/")[-2:]))
                end = time.time()

                print(f"search all wavs paths costs {end-start} seconds")
                print(f"save wav paths to {cache_path}! so we can directly load all_path in next time!")
                pickle.dump(speaker_wav_dict, open(cache_path,"wb"))    

        self.speaker_num = len(self.all_speakers)
        self.necessary_dict = self.processing()
        self.label_mapping_spk_id = {}
        # speaker id  map to speaker num
        self.build_label_mapping()
        self.label=self.build_label(self.dataset)
예제 #19
0
def main(data_dirs, out_dir, n_workers, audio_processor_path):
    """Preprocess audio files into features for training."""

    audio_paths = chain.from_iterable([find_files(data_dir) for data_dir in data_dirs])

    audio_processor_path = Path(audio_processor_path) / "audioprocessor"
    audio_processor_path = str(audio_processor_path).replace("/", ".")
    audioprocessor = getattr(
        importlib.import_module(audio_processor_path), "AudioProcessor"
    )

    save_dir = Path(out_dir)
    save_dir.mkdir(parents=True, exist_ok=True)

    executor = ProcessPoolExecutor(max_workers=n_workers)

    futures = []
    for audio_path in audio_paths:
        futures.append(
            executor.submit(load_process_save, audioprocessor, audio_path, save_dir)
        )

    infos = {
        "sample_rate": audioprocessor.sample_rate,
        "hop_len": audioprocessor.hop_len,
        "n_mels": audioprocessor.n_mels,
        "utterances": [future.result() for future in tqdm(futures, ncols=0)],
    }

    with open(save_dir / "metadata.json", "w") as f:
        json.dump(infos, f, indent=2)
예제 #20
0
def SearchErrorExample(idx, ref, est):
    lab_list = np.array(find_files(C.PATH_CHORDLAB, ext=["lab",
                                                         "chords"]))[idx]
    estimated_lab_list = [
        os.path.join(C.PATH_ESTIMATE_CROSS,
                     p.split("/")[-2],
                     p.split("/")[-1]) for p in lab_list
    ]
    list_examples = []
    for labfile, estfile in zip(lab_list, estimated_lab_list):
        (ref_intervals,
         ref_labels) = mir_eval.io.load_labeled_intervals(labfile)
        (est_intervals,
         est_labels) = mir_eval.io.load_labeled_intervals(estfile)
        est_intervals, est_labels = mir_eval.util.adjust_intervals(
            est_intervals, est_labels, ref_intervals.min(),
            ref_intervals.max(), mir_eval.chord.NO_CHORD,
            mir_eval.chord.NO_CHORD)
        (intervals, ref_labels,
         est_labels) = mir_eval.util.merge_labeled_intervals(
             ref_intervals, ref_labels, est_intervals, est_labels)
        durations = mir_eval.util.intervals_to_durations(intervals)
        ref_labels_id = encode_chordseq_hierarchical(ref_labels)
        est_labels_id = encode_chordseq_hierarchical(est_labels)
        for i in range(len(ref_labels)):
            if durations[i] > 0.5 and ref_labels_id[
                    i, 0] == ref and est_labels_id[i, 0] == est:
                list_examples.append((labfile, ref, est, intervals[i]))

    return list_examples
예제 #21
0
def LoadSpectrogram_batch(batch_size=10) :
    filelist = find_files('./Spectrogram', ext="npz")
    batch_list = []
    for i in range(0, len(filelist), batch_size):
        #print(i)
        file_list = filelist[i : i + batch_size]
        batch_list.append(file_list)
    return batch_list
def separate(PATH_INPUT, PATH_OUTPUT, MODEL, SR=16000, FFT_SIZE = 1024, H = 512):
    
    if os.path.isdir( PATH_INPUT):
        # 入力がディレクトリーの場合、ファイルリストをつくる
        filelist_mixdown = find_files(PATH_INPUT, ext="wav", case_sensitive=True)
    else:
    	# 入力が単一ファイルの場合
        filelist_mixdown=[PATH_INPUT]
    print ('number of mixdown file', len(filelist_mixdown))
    
    # 出力用のディレクトリーがない場合は 作成する。
    _, path_output_ext = os.path.splitext(PATH_OUTPUT)
    print ('path_output_ext',path_output_ext)
    if len(path_output_ext)==0  and  not os.path.exists(PATH_OUTPUT):
        os.mkdir(PATH_OUTPUT)
    
    # モデルの読み込み
    unet = train.UNet()
    chainer.serializers.load_npz( MODEL,unet)
    config.train = False
    config.enable_backprop = False
    
    # ミックスされたものを読み込み、vocal(speech)の分離を試みる
    for fmixdown in filelist_mixdown:
        # audioread でエラーが発生した場合は、scipyを使う。
        try:
            y_mixdown, _ = load(fmixdown,  sr=SR, mono=True)
        except:
            sr_mixdown, y_mixdown = read(fmixdown)
            if not sr_mixdown == SR:
                y_mixdown = resample(y_mixdown, sr_mixdown, SR)
        
        # 入力の短時間スペクトラムを計算して、正規化する。
        spec = stft(y_mixdown, n_fft=FFT_SIZE, hop_length=H, win_length=FFT_SIZE)
        mag = np.abs(spec)
        mag /= np.max(mag)
        phase = np.exp(1.j*np.angle(spec))
        print ('mag.shape', mag.shape)  
        start = 0
        end = 128 * (mag.shape[1] // 128)  # 入力のフレーム数以下で、networkの定義に依存して 適切な値を選ぶこと。
        # speech(vocal)を分離するためのマスクを求める
        mask = unet(mag[:, start:end][np.newaxis, np.newaxis, 1:, :]).data[0, 0, :, :]
        mask = np.vstack((np.zeros(mask.shape[1], dtype="float32"), mask))
        # 入力の短時間スペクトラムにマスクを掛けて、逆FFTで波形を合成する。
        mag2=mag[:, start:end]*mask 
        phase2=phase[:, start:end]
        y = istft(mag2*phase2, hop_length=H, win_length=FFT_SIZE)
        
        # 分離した speech(vocal)を出力ファイルとして保存する。
        if len(path_output_ext)==0:
            # ディレクトリーへ出力
            foutname, _ = os.path.splitext( os.path.basename(fmixdown) )
            fname= os.path.join(PATH_OUTPUT, (foutname + '.wav'))
        else:
            # 指定されたファイルへ出力
            fname= PATH_OUTPUT
        print ('saving... ', fname)
        write_wav(fname, y, SR, norm=True)
예제 #23
0
    def __init__(self, file_path, meta_data, max_timestep=None):

        self.roots = file_path
        self.root_key = list(self.roots.keys())
        self.max_timestep = max_timestep
        self.dataset = []
        self.all_speakers = []

        for key in self.root_key:

            cache_path = f"./downstream/voxceleb2_amsoftmax/cache_wav_paths/cache_{key}.p"
            p = Path(self.roots[key])
            # loca cache_path if file exists
            if os.path.isfile(cache_path):

                # cache dict =
                #{"speaker_id1":["wav_a_path1","wav_a_path2",...],"speaker_id2":["wav_b_path1", "wav_b_path2", ....],...}
                cache_wavs_dict = pickle.load(open(cache_path, "rb"))
                self.all_speakers.extend(list(cache_wavs_dict.keys()))
                for speaker_id in list(cache_wavs_dict.keys()):
                    for wavs in cache_wavs_dict[speaker_id]:
                        self.dataset.append(str(p / speaker_id / wavs))

            else:

                speaker_wav_dict = {}
                # calculate speakers and support to remove black list speaker (dev)
                speaker_dirs = [
                    f.path.split("/")[-1] for f in os.scandir(self.roots[key])
                    if f.is_dir()
                ]
                self.all_speakers.extend(speaker_dirs)

                print("search all wavs paths")
                start = time.time()

                for speaker in tqdm.tqdm(speaker_dirs):
                    speaker_dir = p / speaker
                    wav_list = find_files(speaker_dir)
                    speaker_wav_dict[speaker] = []
                    for wav in wav_list:
                        self.dataset.append(str(speaker_dir / wav))
                        speaker_wav_dict[speaker].append("/".join(
                            wav.split("/")[-2:]))
                end = time.time()
                print(f"search all wavs paths costs {end-start} seconds")
                print(
                    f"save wav paths to {cache_path}! so we can directly load all_path in next time!"
                )
                pickle.dump(speaker_wav_dict, open(cache_path, "wb"))

        self.speaker_num = len(self.all_speakers)
        self.necessary_dict = self.processing()
        self.label_mapping_spk_id = {}
        # speaker id  map to speaker num
        self.build_label_mapping()

        self.label = self.build_label(self.dataset)
def TrainConvnetExtractor(trainidx, epoch=20, saveas="convnet.model"):
    cqtfilelist = np.array(find_files(const.PATH_MIDIHCQT,
                                      ext="npz"))[trainidx]
    #midifilelist = find_files(const.PATH_MIDI,ext="mid")[:filecnt]
    config.train = True
    config.enable_backprop = True
    convnet = networks.FullCNNFeatExtractor()
    model = networks.ConvnetPredictor(convnet)
    model.to_gpu(0)
    opt = optimizers.AdaDelta()
    opt.setup(model)
    print("train set length: %d" % trainidx.size)
    print("start epochs...")
    S = []
    T = []

    for cqtfile in cqtfilelist:
        dat = np.load(cqtfile)
        spec = utils.PreprocessSpec(dat["spec"])[:const.CQT_H, :, :]
        targ = GetConvnetTargetFromPianoroll(dat["target"]).astype(np.int32)
        assert (spec.shape[1] == targ.shape[0])
        S.append(spec)
        T.append(targ)
    S = np.concatenate(S, axis=1)
    T = np.concatenate(T, axis=0)

    for ep in range(epoch):
        sum_loss = 0

        assert (S.shape[1] == T.shape[0])
        randidx = np.random.randint(0,
                                    S.shape[1] - const.CONV_TRAIN_SEQLEN - 1,
                                    S.shape[1] // const.CONV_TRAIN_SEQLEN * 4)
        for i in range(0, randidx.size - const.CONV_TRAIN_BATCH,
                       const.CONV_TRAIN_BATCH):
            x_batch = np.stack([
                S[:, randidx[j]:randidx[j] + const.CONV_TRAIN_SEQLEN, :]
                for j in range(i, i + const.CONV_TRAIN_BATCH)
            ])
            t_batch = np.stack([
                T[randidx[j]:randidx[j] + const.CONV_TRAIN_SEQLEN, :]
                for j in range(i, i + const.CONV_TRAIN_BATCH)
            ])
            x_in = cp.asarray(x_batch)
            t_in = cp.asarray(t_batch)
            model.cleargrads()
            loss = model(x_in, t_in)
            loss.backward()
            opt.update()
            sum_loss += loss.data

        convnet.save(saveas)
        print("epoch: %d/%d  loss:%.04f" %
              (ep + 1, epoch, sum_loss / const.CONV_TRAIN_BATCH))

    convnet.save(saveas)
예제 #25
0
def getNoteTemplates(path_notes):
    list_templates = []
    list_noteaudio = find_files(path_notes,ext="wav")
    for noteaudio in list_noteaudio:
        S_mag = U.LoadAudio(noteaudio)
        init_H = np.ones((1,S_mag.shape[1]))
        template,activate = NMF.nmf_sklearn(S_mag,k=1,H=init_H,verbose=False)
        list_templates.append(template[:,0]/np.max(template))
    templates = np.stack(list_templates)
    return templates
예제 #26
0
def reload_data(path_to_features, part):
    matfiles = find_files(path_to_mat + part + '/', ext='mat')
    for i in range(len(matfiles)):
        if matfiles[i][len(path_to_mat) + len(part) + 1:].startswith('LFCC'):
            key = matfiles[i][len(path_to_mat) + len(part) + 6:-4]
            lfcc = sio.loadmat(matfiles[i],
                               verify_compressed_data_integrity=False)['x']
            with open(path_to_features + part + '/' + key + 'LFCC.pkl',
                      'wb') as handle2:
                pickle.dump(lfcc, handle2, protocol=pickle.HIGHEST_PROTOCOL)
def get_filenames(dir):

    files = find_files(dir, ext='npy')
    filenames = []

    for f in files:
        f = os.path.basename(f)[:-4]
        filenames.append(f)

    return filenames
예제 #28
0
def shuffleCQT_baka():
    TORIDASUBASHO = PATH_hrCQT
    HOZONBASYO = PATH_SAME2
    HOZONBASYO2 = PATH_SAME3

    OVTONE = 2

    audiolist = find_files(PATH_AUDIO, ext="wav")
    cqtlist = find_files(TORIDASUBASHO, ext="npy")
    itemlist = len(audiolist)
    i_counter = 1
    for audiofile, cqtfile in zip(audiolist, cqtlist):
        print("{}/{}".format(i_counter, itemlist))
        filename = audiofile.split('/')[-1]
        albname = audiofile.split('/')[-2]
        # foldname = audiofile.split('/')[-3]
        cqt_filename = cqtfile.split("/")[-1]
        if (filename.split(".")[0] != cqt_filename.split(".")[0]):
            print("file_not_match", filename, cqt_filename)


# ディレクトリチェック

        if not (os.path.exists(HOZONBASYO + '/' + albname)):
            os.makedirs(HOZONBASYO + '/' + albname)
        if not (os.path.exists(HOZONBASYO + '/' + albname + '/' + filename +
                               '.npy')):
            cqt = np.load(cqtfile)
            excqt = np.vstack((cqt, cqt))
            np.save(HOZONBASYO + '/' + albname + '/' + filename + '.npy',
                    np.array(excqt, dtype="float32"))

        if not (os.path.exists(HOZONBASYO2 + '/' + albname)):
            os.makedirs(HOZONBASYO2 + '/' + albname)
        if not (os.path.exists(HOZONBASYO2 + '/' + albname + '/' + filename +
                               '.npy')):
            cqt = np.load(cqtfile)
            excqt = np.vstack((cqt, cqt, cqt))
            np.save(HOZONBASYO2 + '/' + albname + '/' + filename + '.npy',
                    np.array(excqt, dtype="float32"))

        i_counter += 1
 def __init__(self, roots, sample_rate, max_time, target_level, noise_proportion, snrs, **kwargs):
     self.sample_rate = sample_rate
     self.max_time = max_time
     self.target_level = target_level
     self.noise_proportion = noise_proportion
     self.snrs = snrs
     self.filepths = []
     for root in roots:
         self.filepths += find_files(root)
     assert len(self.filepths) > 0, 'No audio file detected'
     self.noise_sampler = torch.distributions.Normal(0, 1)
예제 #30
0
def LoadSpectrogram(target="vocal") :
    filelist = find_files('./Spectrogram', ext="npz")
    x_list = []
    y_list = []
    for file in filelist :
        data = np.load(file)
        x_list.append(data['mix'])
        if target == "vocal" :
            y_list.append(data['vocal'])
        else :
            y_list.append(data['inst'])
    return x_list, y_list
def LoadDataset(target="vocal"):
    filelist_fft = find_files(C.PATH_FFT, ext="npz")[:200]
    Xlist = []
    Ylist = []
    for file_fft in filelist_fft:
        dat = np.load(file_fft)
        Xlist.append(dat["mix"])
        if target == "vocal":
            assert(dat["mix"].shape == dat["vocal"].shape)
            Ylist.append(dat["vocal"])
        else:
            assert(dat["mix"].shape == dat["inst"].shape)
            Ylist.append(dat["inst"])
    return Xlist, Ylist
예제 #32
0
파일: utils.py 프로젝트: bmcfee/crema
def get_ann_audio(directory):
    '''Get a list of annotations and audio files from a directory.

    This also validates that the lengths match and are paired properly.

    Parameters
    ----------
    directory : str
        The directory to search

    Returns
    -------
    pairs : list of tuples (audio_file, annotation_file)
    '''

    audio = find_files(directory)
    annos = find_files(directory, ext=['jams', 'jamz'])

    paired = list(zip(audio, annos))

    if len(audio) != len(annos) or any([base(aud) != base(ann) for aud, ann in paired]):
        raise DataError('Unmatched audio/annotation data in {}'.format(directory))

    return paired
예제 #33
0
# -*- coding: utf-8 -*-
"""
Created on Sat May  7 16:45:29 2016

@author: parallels
"""

import functions
from librosa.util import find_files
from librosa.core import load

audiofilelist = find_files("database/audios/",ext = "wav")
print "saving peaks...."
for audiofile in audiofilelist:
    y,sr = load(audiofile)
    filename = audiofile.split("/")[-1]+".npy" # -1 means the last name of the directory
    functions.save_maximum_array(y,filename)
    print "saved:" + filename
"""
Created on Fri Nov  3 10:59:08 2017

@author: wuyiming
"""

import os
from librosa.core import load
from librosa.util import find_files
import yaml
import util


PATH_MENDLEY = "MedleyDB/Audio"

metadatalist = find_files(PATH_MENDLEY, ext="yaml")

all_voctracks = []
all_insttracks = []

for metafile in metadatalist:
    print("YAML file: %s" % metafile)
    songname = metafile.split("/")[-2]
    print("song: %s" % songname)
    with open(metafile, "r+") as f:
        data = yaml.load(f)

    if data["instrumental"] != "no":
        print("Instrumental track. Skipped.")
        continue