def run(args): numpy_reader = NumpyReader(args.npy_scp) spk2utt = parse_scps(args.spk2utt, num_tokens=-1) if args.spk2utt else None with NumpyWriter(args.dump_dir, args.scp) as writer: if spk2utt is None: for key, mat in numpy_reader: if mat.ndim != 2: raise RuntimeError( "--spk2utt is None, so input ndarray must be 2D, got {:d}" .format(mat.ndim)) if args.normalize: mat = mat / np.linalg.norm( mat, ord=2, axis=1, keepdims=True) writer.write(key, np.mean(mat, axis=0)) logger.info("Processed {:d} speakers".format(len(numpy_reader))) else: for spkid, uttlist in spk2utt.items(): spkset = [] for uttid in uttlist: vec = numpy_reader[uttid] if vec.ndim != 1: raise RuntimeError( "--spk2utt is not None, expect input as vector, got {:d}" .format(vec.ndim)) if args.normalize: vec = vec / np.linalg.norm(vec) spkset.append(vec) spk_mat = np.stack(spkset) writer.write(spkid, np.mean(spk_mat, axis=0)) logger.info("Processed {:d} speakers".format(len(spk2utt)))
def __init__(self, data_dir): depends = [os.path.join(data_dir, x) for x in ["feats.scp", "spk2utt"]] for depend in depends: if not os.path.exists(depend): raise RuntimeError("Missing {}!".format(depend)) self.reader = ScriptReader(depends[0]) self.spk2utt = parse_scps(depends[1], num_tokens=-1)
def run(args): utt2spk = parse_scps(args.utt2spk) def Reader(scp, t): return NumpyReader(scp) if t == "numpy" else ScriptReader(scp) spks_reader = Reader(args.spks_scp, args.type) spks_keys, spks_embs = [], [] for spkid, spkvec in spks_reader: spks_keys.append(spkid) spks_embs.append(spkvec) spks_mat = np.stack(spks_embs) if args.normalize: spks_mat = np.linalg.norm(spks_mat, axis=1, ord=2, keepdims=True) logger.info("Load {:d} speakers from enrollment embeddings".format( len(spks_keys))) eval_reader = Reader(args.eval_scp, args.type) for uttid, uttvec in eval_reader: spkid = utt2spk[uttid] if args.normalize: uttvec = uttvec / np.linalg.norm(uttvec) if spkid not in spks_keys: raise RuntimeError( "Seems speaker {} do not exist in enrollment set".format( spkid)) # using dot product, because embeddings has been normalized # 1 x N score_mat = uttvec @ np.transpose(spks_mat) for index, cmpid in enumerate(spks_keys): print("{:.2f} {}".format( score_mat[index], "target" if cmpid == spkid else "nontarget")) logger.info("Compute scores for {:d} utterances done".format( len(eval_reader)))
def __init__(self, spk2class=None): self.s2c = parse_scps(spk2class) if spk2class else None self.snr = defaultdict(float) self.cnt = defaultdict(int)
def __init__(self, spk2gender=None): self.s2g = parse_scps(spk2gender) if spk2gender else None self.snr = defaultdict(float) self.cnt = defaultdict(int)