Python cdistの例、pyannote.core.utils.distance.cdist Pythonの例

コード例 #1

0

ファイルを表示

    def predict_label(self, audio1_filepath, audio2_filepath):
        if audio1_filepath in self.filename2embedding:
            audio1_embedding = self.filename2embedding[audio1_filepath]
        else:
            audio1_embedding = np.mean(self.engine(
                {"uri": "audio1uri", "audio": audio1_filepath}),
                axis=0, keepdims=True)
            self.filename2embedding[audio1_filepath] = audio1_embedding

        if audio2_filepath in self.filename2embedding:
            audio2_embedding = self.filename2embedding[audio2_filepath]
        else:
            audio2_embedding = np.mean(self.engine(
                {"uri": "audio2uri", "audio": audio2_filepath}),
                axis=0, keepdims=True)
            self.filename2embedding[audio2_filepath] = audio2_embedding

        distance = cdist(audio1_embedding, audio2_embedding, metric="cosine")

        dist = distance[0][0]

        if dist < 0.5:
            label = 1
        else:
            label = 0

        return label

コード例 #2

0

ファイルを表示

    def __call__(self, X_target, X):
        """Assign each sample to its closest class (if close enough)

        Parameters
        ----------
        X_target : `np.ndarray`
            (n_targets, n_dimensions) target embeddings
        X : `np.ndarray`
            (n_samples, n_dimensions) sample embeddings

        Returns
        -------
        assignments : `np.ndarray`
            (n_samples, ) sample assignments
        """

        if self.normalize:
            X_target = l2_normalize(X_target)
            X = l2_normalize(X)

        distance = cdist(X_target, X, metric=self.metric)
        targets = np.argmin(distance, axis=0)

        for i, k in enumerate(targets):
            if distance[k, i] > self.threshold:
                # do not assign
                targets[i] = -i

        return targets

コード例 #3

0

ファイルを表示

    def eval(self, model, partition: str = 'development'):
        model.eval()
        sequence_embedding = SequenceEmbedding(
            model=model,
            feature_extraction=self.config.feature_extraction,
            duration=self.config.duration,
            step=.5 * self.config.duration,
            batch_size=self.batch_size,
            device=common.DEVICE)
        protocol = get_protocol(self.config.protocol_name,
                                progress=False,
                                preprocessors=self.config.preprocessors)

        y_true, y_pred, cache = [], [], {}

        for trial in getattr(protocol, f"{partition}_trial")():

            # Compute embeddings
            emb1 = self._file_embedding(trial['file1'], sequence_embedding,
                                        cache)
            emb2 = self._file_embedding(trial['file2'], sequence_embedding,
                                        cache)

            # Compare embeddings
            dist = cdist(emb1, emb2,
                         metric=self.distance.to_sklearn_metric())[0, 0]

            y_pred.append(dist)
            y_true.append(trial['reference'])

        _, _, _, eer = det_curve(np.array(y_true),
                                 np.array(y_pred),
                                 distances=True)

        # Returning 1-eer because the evaluator keeps track of the highest metric value
        return 1 - eer, y_pred, y_true

コード例 #4

0

ファイルを表示

ファイル: voxceleb_19epochfinetune.py プロジェクト: bml1g12/zalo-2020-challenge-voice-verification

        audio2_filepath = os.path.join(
            expt_root, "Train-Test-Data/public-test/") + str(row["audio_2"])
        if audio2_filepath in filename2embedding:
            audio2_embedding = filename2embedding[audio2_filepath]
        else:
            audio2_embedding = np.mean(emb({
                "uri": row["audio_2"],
                "audio": audio2_filepath
            }),
                                       axis=0,
                                       keepdims=True)
            filename2embedding[audio2_filepath] = audio2_embedding

        # X_audio1 = l2_normalize(np.array([audio1_embedding,]))
        # X_audio2 = l2_normalize(np.array([audio2_embedding,]))
        distance = cdist(audio1_embedding, audio2_embedding, metric="cosine")
        #if (i % 1000) == 0:
        #    print(f"Distance is {distance[0][0]} for index {i} ")
        dist = distance[0][0]
        df_test_sub.loc[i, "dist"] = dist
        df_test_sub.loc[i, "audio1_filepath"] = audio1_filepath
        df_test_sub.loc[i, "audio2_filepath"] = audio2_filepath

        pbar.update()

with open(f"{expt_name}embedding_public.pickle", 'wb') as handle:
    pickle.dump(filename2embedding, handle, protocol=pickle.HIGHEST_PROTOCOL)

df_test_sub.to_csv(f"{expt_name}_df_test_sub.csv", index=False)

コード例 #5

0

ファイルを表示

ファイル: speaker_embedding.py プロジェクト: zhangpengpengpeng/pyannote-audio

    def _validate_epoch_verification(
        self,
        epoch,
        validation_data,
        protocol=None,
        subset: Subset = "development",
        device: Optional[torch.device] = None,
        batch_size: int = 32,
        n_jobs: int = 1,
        duration: float = None,
        step: float = 0.25,
        metric: str = None,
        **kwargs,
    ):

        # initialize embedding extraction
        pretrained = Pretrained(
            validate_dir=self.validate_dir_,
            epoch=epoch,
            duration=duration,
            step=step,
            batch_size=batch_size,
            device=device,
        )

        preprocessors = self.preprocessors_
        if "audio" not in preprocessors:
            preprocessors["audio"] = FileFinder()
        if "duration" not in preprocessors:
            preprocessors["duration"] = get_audio_duration
        _protocol = get_protocol(protocol, preprocessors=preprocessors)

        y_true, y_pred, cache = [], [], {}

        for trial in getattr(_protocol, f"{subset}_trial")():

            # compute embedding for file1
            file1 = trial["file1"]
            hash1 = self.get_hash(file1)
            if hash1 in cache:
                emb1 = cache[hash1]
            else:
                emb1 = self.get_embedding(file1, pretrained)
                cache[hash1] = emb1

            # compute embedding for file2
            file2 = trial["file2"]
            hash2 = self.get_hash(file2)
            if hash2 in cache:
                emb2 = cache[hash2]
            else:
                emb2 = self.get_embedding(file2, pretrained)
                cache[hash2] = emb2

            # compare embeddings
            distance = cdist(emb1, emb2, metric=metric)[0, 0]
            y_pred.append(distance)

            y_true.append(trial["reference"])
        _, _, _, eer = det_curve(np.array(y_true),
                                 np.array(y_pred),
                                 distances=True)

        return {
            "metric": "equal_error_rate",
            "minimize": True,
            "value": float(eer)
        }

コード例 #6

0

ファイルを表示

ファイル: speaker_embedding.py プロジェクト: zhiqizhang/pyannote-audio

    def _validate_epoch_verification(self,
                                     epoch,
                                     validation_data,
                                     protocol=None,
                                     subset='development',
                                     device: Optional[torch.device] = None,
                                     batch_size: int = 32,
                                     n_jobs: int = 1,
                                     duration: float = None,
                                     step: float = 0.25,
                                     metric: str = None,
                                     **kwargs):

        # initialize embedding extraction
        pretrained = Pretrained(validate_dir=self.validate_dir_,
                                epoch=epoch,
                                duration=duration,
                                step=step,
                                batch_size=batch_size,
                                device=device)

        _protocol = get_protocol(protocol,
                                 progress=False,
                                 preprocessors=self.preprocessors_)

        y_true, y_pred, cache = [], [], {}

        for trial in getattr(_protocol, '{0}_trial'.format(subset))():

            # compute embedding for file1
            file1 = trial['file1']
            hash1 = self.get_hash(file1)
            if hash1 in cache:
                emb1 = cache[hash1]
            else:
                emb1 = self.get_embedding(file1, pretrained)
                cache[hash1] = emb1

            # compute embedding for file2
            file2 = trial['file2']
            hash2 = self.get_hash(file2)
            if hash2 in cache:
                emb2 = cache[hash2]
            else:
                emb2 = self.get_embedding(file2, pretrained)
                cache[hash2] = emb2

            # compare embeddings
            distance = cdist(emb1, emb2, metric=metric)[0, 0]
            y_pred.append(distance)

            y_true.append(trial['reference'])

        _, _, _, eer = det_curve(np.array(y_true),
                                 np.array(y_pred),
                                 distances=True)

        return {
            'metric': 'equal_error_rate',
            'minimize': True,
            'value': float(eer)
        }