def predict_label(self, audio1_filepath, audio2_filepath): if audio1_filepath in self.filename2embedding: audio1_embedding = self.filename2embedding[audio1_filepath] else: audio1_embedding = np.mean(self.engine( {"uri": "audio1uri", "audio": audio1_filepath}), axis=0, keepdims=True) self.filename2embedding[audio1_filepath] = audio1_embedding if audio2_filepath in self.filename2embedding: audio2_embedding = self.filename2embedding[audio2_filepath] else: audio2_embedding = np.mean(self.engine( {"uri": "audio2uri", "audio": audio2_filepath}), axis=0, keepdims=True) self.filename2embedding[audio2_filepath] = audio2_embedding distance = cdist(audio1_embedding, audio2_embedding, metric="cosine") dist = distance[0][0] if dist < 0.5: label = 1 else: label = 0 return label
def __call__(self, X_target, X): """Assign each sample to its closest class (if close enough) Parameters ---------- X_target : `np.ndarray` (n_targets, n_dimensions) target embeddings X : `np.ndarray` (n_samples, n_dimensions) sample embeddings Returns ------- assignments : `np.ndarray` (n_samples, ) sample assignments """ if self.normalize: X_target = l2_normalize(X_target) X = l2_normalize(X) distance = cdist(X_target, X, metric=self.metric) targets = np.argmin(distance, axis=0) for i, k in enumerate(targets): if distance[k, i] > self.threshold: # do not assign targets[i] = -i return targets
def eval(self, model, partition: str = 'development'): model.eval() sequence_embedding = SequenceEmbedding( model=model, feature_extraction=self.config.feature_extraction, duration=self.config.duration, step=.5 * self.config.duration, batch_size=self.batch_size, device=common.DEVICE) protocol = get_protocol(self.config.protocol_name, progress=False, preprocessors=self.config.preprocessors) y_true, y_pred, cache = [], [], {} for trial in getattr(protocol, f"{partition}_trial")(): # Compute embeddings emb1 = self._file_embedding(trial['file1'], sequence_embedding, cache) emb2 = self._file_embedding(trial['file2'], sequence_embedding, cache) # Compare embeddings dist = cdist(emb1, emb2, metric=self.distance.to_sklearn_metric())[0, 0] y_pred.append(dist) y_true.append(trial['reference']) _, _, _, eer = det_curve(np.array(y_true), np.array(y_pred), distances=True) # Returning 1-eer because the evaluator keeps track of the highest metric value return 1 - eer, y_pred, y_true
audio2_filepath = os.path.join( expt_root, "Train-Test-Data/public-test/") + str(row["audio_2"]) if audio2_filepath in filename2embedding: audio2_embedding = filename2embedding[audio2_filepath] else: audio2_embedding = np.mean(emb({ "uri": row["audio_2"], "audio": audio2_filepath }), axis=0, keepdims=True) filename2embedding[audio2_filepath] = audio2_embedding # X_audio1 = l2_normalize(np.array([audio1_embedding,])) # X_audio2 = l2_normalize(np.array([audio2_embedding,])) distance = cdist(audio1_embedding, audio2_embedding, metric="cosine") #if (i % 1000) == 0: # print(f"Distance is {distance[0][0]} for index {i} ") dist = distance[0][0] df_test_sub.loc[i, "dist"] = dist df_test_sub.loc[i, "audio1_filepath"] = audio1_filepath df_test_sub.loc[i, "audio2_filepath"] = audio2_filepath pbar.update() with open(f"{expt_name}embedding_public.pickle", 'wb') as handle: pickle.dump(filename2embedding, handle, protocol=pickle.HIGHEST_PROTOCOL) df_test_sub.to_csv(f"{expt_name}_df_test_sub.csv", index=False)
def _validate_epoch_verification( self, epoch, validation_data, protocol=None, subset: Subset = "development", device: Optional[torch.device] = None, batch_size: int = 32, n_jobs: int = 1, duration: float = None, step: float = 0.25, metric: str = None, **kwargs, ): # initialize embedding extraction pretrained = Pretrained( validate_dir=self.validate_dir_, epoch=epoch, duration=duration, step=step, batch_size=batch_size, device=device, ) preprocessors = self.preprocessors_ if "audio" not in preprocessors: preprocessors["audio"] = FileFinder() if "duration" not in preprocessors: preprocessors["duration"] = get_audio_duration _protocol = get_protocol(protocol, preprocessors=preprocessors) y_true, y_pred, cache = [], [], {} for trial in getattr(_protocol, f"{subset}_trial")(): # compute embedding for file1 file1 = trial["file1"] hash1 = self.get_hash(file1) if hash1 in cache: emb1 = cache[hash1] else: emb1 = self.get_embedding(file1, pretrained) cache[hash1] = emb1 # compute embedding for file2 file2 = trial["file2"] hash2 = self.get_hash(file2) if hash2 in cache: emb2 = cache[hash2] else: emb2 = self.get_embedding(file2, pretrained) cache[hash2] = emb2 # compare embeddings distance = cdist(emb1, emb2, metric=metric)[0, 0] y_pred.append(distance) y_true.append(trial["reference"]) _, _, _, eer = det_curve(np.array(y_true), np.array(y_pred), distances=True) return { "metric": "equal_error_rate", "minimize": True, "value": float(eer) }
def _validate_epoch_verification(self, epoch, validation_data, protocol=None, subset='development', device: Optional[torch.device] = None, batch_size: int = 32, n_jobs: int = 1, duration: float = None, step: float = 0.25, metric: str = None, **kwargs): # initialize embedding extraction pretrained = Pretrained(validate_dir=self.validate_dir_, epoch=epoch, duration=duration, step=step, batch_size=batch_size, device=device) _protocol = get_protocol(protocol, progress=False, preprocessors=self.preprocessors_) y_true, y_pred, cache = [], [], {} for trial in getattr(_protocol, '{0}_trial'.format(subset))(): # compute embedding for file1 file1 = trial['file1'] hash1 = self.get_hash(file1) if hash1 in cache: emb1 = cache[hash1] else: emb1 = self.get_embedding(file1, pretrained) cache[hash1] = emb1 # compute embedding for file2 file2 = trial['file2'] hash2 = self.get_hash(file2) if hash2 in cache: emb2 = cache[hash2] else: emb2 = self.get_embedding(file2, pretrained) cache[hash2] = emb2 # compare embeddings distance = cdist(emb1, emb2, metric=metric)[0, 0] y_pred.append(distance) y_true.append(trial['reference']) _, _, _, eer = det_curve(np.array(y_true), np.array(y_pred), distances=True) return { 'metric': 'equal_error_rate', 'minimize': True, 'value': float(eer) }