Python loadWAV Examples, DatasetLoader.loadWAV Python Examples

Example #1

0

Show file

    def enrollment_dic_kwsTrials(self,
                                 listfilename,
                                 uttpath,
                                 utt2label,
                                 save_path,
                                 print_interval=10,
                                 num_eval=10,
                                 eval_frames=None,
                                 save_dic=False):

        self.eval()

        lines = []
        enroll_utt = []
        feats = {}
        tstart = time.time()

        ## Read all lines
        with open(listfilename) as listfile:
            lines = listfile.readlines()

        for line in lines:
            data = line.split()
            enroll_utt.append(data[0])
            enroll_utt.append(data[1])
            enroll_utt.append(data[2])
        set_enroll_utt = list(set(enroll_utt))
        set_enroll_utt.sort()

        ##extract enrollment data embeddings
        for idx, uttid in enumerate(set_enroll_utt):
            with torch.no_grad():
                inp = torch.FloatTensor(loadWAV(uttpath + uttid, 0, True,
                                                10)).cuda()

                embd = self.__S__.forward(inp).cpu()

            feats[uttid] = embd
            telapsed = time.time() - tstart
            if idx % print_interval == 0:
                sys.stdout.write(
                    "\rReading %d of %d: %.2f Hz, embedding size %d" %
                    (idx, len(set_enroll_utt), idx / telapsed, embd.size()[1]))
        feats_np = {}
        for utt in feats:
            feats_np[utt] = feats[utt].numpy()
        if save_dic == True:
            savenpy_path = save_path
            numpy.save(savenpy_path, feats_np)

        end = time.time() - tstart
        print("\n total time %.2f" % (end))
        return feats_np

Example #2

0

Show file

File: visualize.py Project: rakesh283343/voxsrc-2020

def get_embeddings(model, test_list, test_path, max_frames=1000):
    """2D scatter plot
    
    Parameters
    ----------
    model : torch.nn.Module
        The loaded model.
        
    test_list : str
        The list of wav file names.
        
    test_path : str
        The wav file location.
        
    max_frames : int
        The max number of frames to process per utterance
        
    Returns
    -------
    embeddings : numpy.ndarray
        Stacked speaker embeddings per utterance. Each row is an embedding.
        
    labels : list
        List of str. Corresponding speaker IDs.
    """
    # TODO(alexbooth): what does num_eval do?
    num_eval = 10

    wav_paths = parse_test_list(test_list)

    embeddings = None
    labels = []

    for idx, wav_path in enumerate(wav_paths):
        input_ = loadWAV(os.path.join(test_path, wav_path),
                         max_frames,
                         evalmode=True,
                         num_eval=num_eval).to(device)
        output = model.forward(input_).detach().cpu()

        if embeddings == None:
            embeddings = output.view(-1).unsqueeze(0)
        else:
            output = output.view(-1).unsqueeze(0)
            embeddings = torch.cat([embeddings, output])

        labels.append(wav_path.split('/')[0])

    return embeddings, labels

Example #3

0

Show file

def loadAudio(file):
    audio = loadWAV(file, args.eval_frames, evalmode=True)
    return torch.FloatTensor(audio)

Example #4

0

Show file

File: SpeakerNet.py Project: twistedmove/multistream-CNN

    def evaluateFromList(self,
                         listfilename,
                         print_interval=100,
                         test_path='',
                         num_eval=10,
                         eval_frames=None):

        self.eval()

        lines = []
        files = []
        feats = {}
        tstart = time.time()

        ## Read all lines
        with open(listfilename) as listfile:
            while True:
                line = listfile.readline()
                if (not line):
                    break

                data = line.split()

                ## Append random label if missing
                if len(data) == 2: data = [random.randint(0, 1)] + data

                files.append(data[1])
                files.append(data[2])
                lines.append(line)

        setfiles = list(set(files))
        setfiles.sort()

        ## Save all features to file
        for idx, file in enumerate(setfiles):

            inp1 = loadWAV(os.path.join(test_path, file),
                           eval_frames,
                           evalmode=True,
                           num_eval=num_eval).to(device)

            ref_feat = self.__S__.forward(inp1).detach().cpu()

            filename = '%06d.wav' % idx

            feats[file] = ref_feat

            telapsed = time.time() - tstart

            if idx % print_interval == 0:
                sys.stdout.write(
                    "\rReading %d of %d: %.2f Hz, embedding size %d" %
                    (idx, len(setfiles), idx / telapsed, ref_feat.size()[1]))

        print('')
        all_scores = []
        all_labels = []
        all_trials = []
        tstart = time.time()

        ## Read files and compute all scores
        for idx, line in enumerate(lines):

            data = line.split()

            ## Append random label if missing
            if len(data) == 2: data = [random.randint(0, 1)] + data

            ref_feat = feats[data[1]].to(device)
            com_feat = feats[data[2]].to(device)

            if self.__L__.test_normalize:
                ref_feat = F.normalize(ref_feat, p=2, dim=1)
                com_feat = F.normalize(com_feat, p=2, dim=1)

            dist = F.pairwise_distance(ref_feat.unsqueeze(-1),
                                       com_feat.unsqueeze(-1).transpose(
                                           0, 2)).detach().cpu().numpy()

            score = -1 * numpy.mean(dist)

            all_scores.append(score)
            all_labels.append(int(data[0]))
            all_trials.append(data[1] + " " + data[2])

            if idx % print_interval == 0:
                telapsed = time.time() - tstart
                sys.stdout.write("\rComputing %d of %d: %.2f Hz" %
                                 (idx, len(lines), idx / telapsed))
                sys.stdout.flush()

        print('\n')

        return (all_scores, all_labels, all_trials)

Example #5

0

Show file

    def evaluateFromListSave(self, listfilename, print_interval=5000, feat_dir='', test_path='', num_eval=10):
        
        self.eval();
        
        lines       = []
        files       = []
        filedict    = {}
        feats       = {}
        tstart      = time.time()

        if feat_dir != '':
            print('Saving temporary files to %s'%feat_dir)
            if not(os.path.exists(feat_dir)):
                os.makedirs(feat_dir)

        ## Read all lines
        with open(listfilename) as listfile:
            while True:
                line = listfile.readline();
                if (not line): #  or (len(all_scores)==1000) 
                    break;

                data = line.split();

                files.append(data[1])
                files.append(data[2])
                lines.append(line)

        setfiles = list(set(files))
        setfiles.sort()

        ## Save all features to file
        for idx, file in enumerate(setfiles):

            inp1 = loadWAV(os.path.join(test_path,file), self.__max_frames__, evalmode=True, num_eval=num_eval).cuda()

            ref_feat = self.__S__.forward(inp1).detach().cpu()

            filename = '%06d.wav'%idx

            if feat_dir == '':
                feats[file]     = ref_feat
            else:
                filedict[file]  = filename
                torch.save(ref_feat,os.path.join(feat_dir,filename))

            telapsed = time.time() - tstart

            if idx % print_interval == 0:
                sys.stdout.write("\rReading %d: %.2f Hz, embed size %d"%(idx,idx/telapsed,ref_feat.size()[1]));

        print('')
        all_scores = [];
        all_labels = [];
        tstart = time.time()

        ## Read files and compute all scores
        for idx, line in enumerate(lines):

            data = line.split();

            if feat_dir == '':
                ref_feat = feats[data[1]].cuda()
                com_feat = feats[data[2]].cuda()
            else:
                ref_feat = torch.load(os.path.join(feat_dir,filedict[data[1]])).cuda()
                com_feat = torch.load(os.path.join(feat_dir,filedict[data[2]])).cuda()

            if self.__test_normalize__:
                ref_feat = F.normalize(ref_feat, p=2, dim=1)
                com_feat = F.normalize(com_feat, p=2, dim=1)

            dist = F.pairwise_distance(ref_feat.unsqueeze(-1).expand(-1,-1,num_eval), com_feat.unsqueeze(-1).expand(-1,-1,num_eval).transpose(0,2)).detach().cpu().numpy();

            score = -1 * numpy.mean(dist);

            all_scores.append(score);  
            all_labels.append(int(data[0]));

            if idx % print_interval == 0:
                telapsed = time.time() - tstart
                sys.stdout.write("\rComputing %d: %.2f Hz"%(idx,idx/telapsed));
                sys.stdout.flush();

        if feat_dir != '':
            print(' Deleting temporary files.')
            shutil.rmtree(feat_dir)

        print('\n')

        return (all_scores, all_labels);

Example #6

0

Show file

File: compute_score.py Project: yh646492956/THE-2020-PERSONALIZED-VOICE-TRIGGER-CHALLENGE-BASELINE-SYSTEM

 threshold = parameter_dic['eer_threshold']
 if len(lines[0].strip().split()) == 5:  # on dev set
     print('Deal with dev set')
     tsatrt = time.time()
     for line in tqdm.tqdm(lines):
         data = line.strip().split()
         final_labels.append(data[4])
         if utt2label[data[3]] == 'non-trigger':
             output_score.append('negative')
             continue
         else:
             with torch.no_grad():
                 uttid = data[3] + '.wav'
                 if uttid not in eval_dic:
                     inp = torch.FloatTensor(
                         loadWAV(args.uttpath + uttid, 0, True, 10)).cuda()
                     eval_embd = s.__S__.forward(inp).cpu().numpy()
                     eval_dic[uttid] = eval_embd
                 else:
                     eval_embd = eval_dic[uttid]
             eval_embd = numpy.squeeze(eval_embd)
             enroll_embd = (enroll_dic[data[0]] + enroll_dic[data[1]] +
                            enroll_dic[data[2]]) / 3
             enroll_embd = numpy.squeeze(enroll_embd)
             result = 1 - spatial.distance.cosine(eval_embd, enroll_embd)
             if result < threshold:
                 output_score.append('negative')
             else:
                 output_score.append('positive')
     tend = time.time() - tsatrt
     print('total time: %.2f' % (tend))

Example #7

0

Show file

    tsatrt = time.time()
    for line in tqdm.tqdm(lines):
        # Only use 40000 lines of trial file, because the back contains stitched audio
        data = line.strip().split()
        final_labels.append(data[4])
        if utt2label[data[3]] == 'negative':
            output_score.append('negative')
            continue
        elif (utt2label[data[3]] == 'trigger') & (u2l_template[data[3]]
                                                  == 'positive'):
            with torch.no_grad():
                uttid = data[3] + '.wav'
                if uttid not in eval_dic:
                    inp = torch.FloatTensor(
                        loadWAV(args.uttpath + uttid, 0, True, 10)).cuda()
                    eval_embd = s.__S__.forward(inp).cpu().numpy()
                    eval_dic[uttid] = eval_embd
                else:
                    eval_embd = eval_dic[uttid]
            eval_embd = numpy.squeeze(eval_embd)
            enroll_embd = (enroll_dic[data[0]] + enroll_dic[data[1]] +
                           enroll_dic[data[2]]) / 3
            enroll_embd = numpy.squeeze(enroll_embd)
            result = 1 - spatial.distance.cosine(eval_embd, enroll_embd)
            scores.append(result)
            if data[4] == 'negative':
                labels.append(0)
            else:
                labels.append(1)
            output_score.append('tbd')

Example #8

0

Show file

    def evaluateFromList(self,
                         wav1,
                         wav2,
                         print_interval=100,
                         test_path='',
                         num_eval=10,
                         eval_frames=None):

        self.eval()

        lines = []
        files = []
        feats = {}
        tstart = time.time()

        lines = [wav1, wav2]
        setfiles = list(set(lines))
        setfiles.sort()

        ## Save all features to file
        for idx, file in enumerate(setfiles):

            inp1 = torch.FloatTensor(
                loadWAV(os.path.join(test_path, file),
                        eval_frames,
                        evalmode=True,
                        num_eval=num_eval)).cuda()

            ref_feat = self.__S__.forward(inp1).detach().cpu()

            filename = '%06d.wav' % idx

            feats[file] = ref_feat

            telapsed = time.time() - tstart

            if idx % print_interval == 0:
                sys.stdout.write(
                    "\rReading %d of %d: %.2f Hz, embedding size %d" %
                    (idx, len(setfiles), idx / telapsed, ref_feat.size()[1]))

        print('')

        tstart = time.time()

        ## Read files and compute all scores
        idx = 0

        ref_feat = feats[wav1].cuda()
        com_feat = feats[wav2].cuda()

        if self.__L__.test_normalize:
            ref_feat = F.normalize(ref_feat, p=2, dim=1)
            com_feat = F.normalize(com_feat, p=2, dim=1)

        dist = F.pairwise_distance(ref_feat.unsqueeze(-1),
                                   com_feat.unsqueeze(-1).transpose(
                                       0, 2)).detach().cpu().numpy()

        score = -1 * numpy.mean(dist)

        trial = wav1 + " " + wav2

        if idx % print_interval == 0:
            telapsed = time.time() - tstart
            sys.stdout.write("\rComputing %d of %d: %.2f Hz" %
                             (idx, len(lines), idx / telapsed))
            sys.stdout.flush()

        print('\n')

        return (score, trial)

Example #9

0

Show file

    def evaluateFromList(self,
                         listfilename,
                         print_interval=100,
                         test_path='',
                         num_eval=0,
                         eval_frames=None,
                         step=0.2,
                         save_path="./"):

        self.eval()

        lines = []
        files = []
        feats = {}
        tstart = time.time()

        ## Read all lines
        with open(listfilename) as listfile:
            while True:
                line = listfile.readline()
                if (not line):
                    break

                data = line.strip()

                ## Append random label if missing
                # if len(data) == 2: data = [random.randint(0,1)] + data

                files.append(data)
                # files.append(data[2])
                lines.append(line)

        setfiles = list(set(files))
        setfiles.sort()

        ## Save all features to file
        for idx, file in enumerate(setfiles):
            wavs = loadWAV(os.path.join(test_path, file + '.wav'),
                           eval_frames,
                           evalmode=True,
                           num_eval=num_eval,
                           step=step)

            print('wavs size', len(wavs))
            res = []
            for c in chunks(wavs, 20):
                c = numpy.stack(c, axis=0).astype(numpy.float)
                inp1 = torch.FloatTensor(c).to(device)

                ref_feat = self.__S__.forward(inp1).detach().cpu()
                res.append(ref_feat)

            res = torch.cat(res)

            # import pickle
            # pickle.dump(res,  open( f"{file}.p", "wb" ) )

            with open(f'{save_path}/{file}.npy', 'wb') as f:
                numpy.save(f, res)

            filename = '%06d.wav' % idx

            feats[file] = ref_feat

            telapsed = time.time() - tstart

            if idx % print_interval == 0:
                sys.stdout.write(
                    "\rReading %d of %d: %.2f Hz, embedding size %d" %
                    (idx, len(setfiles), idx / telapsed, ref_feat.size()[1]))

        return
        all_scores = []
        all_labels = []
        all_trials = []
        tstart = time.time()

        ## Read files and compute all scores
        for idx, line in enumerate(lines):

            data = line.split()

            ## Append random label if missing
            if len(data) == 2: data = [random.randint(0, 1)] + data

            ref_feat = feats[data[1]].to(device)
            com_feat = feats[data[2]].to(device)

            if self.__L__.test_normalize:
                ref_feat = F.normalize(ref_feat, p=2, dim=1)
                com_feat = F.normalize(com_feat, p=2, dim=1)

            dist = F.pairwise_distance(ref_feat.unsqueeze(-1),
                                       com_feat.unsqueeze(-1).transpose(
                                           0, 2)).detach().cpu().numpy()

            score = -1 * numpy.mean(dist)

            all_scores.append(score)
            all_labels.append(int(data[0]))
            all_trials.append(data[1] + " " + data[2])

            if idx % print_interval == 0:
                telapsed = time.time() - tstart
                sys.stdout.write("\rComputing %d of %d: %.2f Hz" %
                                 (idx, len(lines), idx / telapsed))
                sys.stdout.flush()

        print('\n')

        return (all_scores, all_labels, all_trials)