def transform(self, X, indices=None, sad=None, save_ivecs=False, keep_stats=False, name=None): """ Parameters ---------- X : ndarray Training data [n_samples, n_features] indices : {Mapping, tuple, list} in case the data is given by a list of files, `indices` act as file indicator mapping from 'file_name' -> (start_index_in_X, end_index_in_X) This mapping can be provided by a dictionary, or list of tuple. sad : ndarray inspired by the "Speech Activity Detection" (SAD) indexing, this array is indicator of which samples will be taken into training; the shape should be [n_samples,] or [n_samples, 1] save_ivecs : bool if True, save extracted i-vectors to disk at path `ivec_[name]` if False, return directly the i-vectors without saving keep_stats : bool if True, keep the zero and first order statistics. The first order statistics could consume huge amount of disk space. Otherwise, they are deleted after training name : {None, str} identity of the i-vectors (for re-using in future). If None, a random name is used """ if not self.is_fitted: raise ValueError( "Ivector has not been fitted, call Ivector.fit(...) first") n_files = X.shape[0] if indices is None else len(indices) if name is None: name = uuid(length=8) else: name = str(name) # ====== init ====== # z_path = self.get_z_path(name) f_path = self.get_f_path(name) if save_ivecs: i_path = self.get_i_path(name) else: i_path = None name_path = self.get_name_path(name) # ====== check exist i-vector file ====== # if i_path is not None and os.path.exists(i_path): ivec = MmapArray(path=i_path) assert ivec.shape[0] == n_files and ivec.shape[1] == self.tv_dim,\ "Need i-vectors for %d files, found exists data at path:'%s' with shape:%s" % \ (n_files, i_path, ivec.shape) return ivec # ====== extract Z and F ====== # if os.path.exists(z_path) and os.path.exists(f_path): pass else: if os.path.exists(z_path): os.remove(z_path) if os.path.exists(f_path): os.remove(f_path) if os.path.exists(name_path): os.remove(name_path) _extract_zero_and_first_stats(X=X, sad=sad, indices=indices, gmm=self.gmm, z_path=z_path, f_path=f_path, name_path=name_path) Z = MmapArray(path=z_path) F = MmapArray(path=f_path) # ====== extract I-vec ====== # ivec = self.tmat.transform_to_disk(path=i_path, Z=Z, F=F, dtype='float32') # ====== clean ====== # Z.close() F.close() if not keep_stats: if os.path.exists(z_path): os.remove(z_path) if os.path.exists(f_path): os.remove(f_path) else: print("Zero-order stats saved at:", ctext(z_path, 'cyan')) print("First-order stats saved at:", ctext(f_path, 'cyan')) return ivec
def fit(self, X, indices=None, sad=None, refit_gmm=False, refit_tmat=False, extract_ivecs=False, keep_stats=False): """ Parameters ---------- X : ndarray Training data [n_samples, n_features] indices : {Mapping, tuple, list} in case the data is given by a list of files, `indices` act as file indicator mapping from 'file_name' -> (start_index_in_X, end_index_in_X) This mapping can be provided by a dictionary, or list of tuple. Note: the order provided in indices will be preserved sad : ndarray inspired by the "Speech Activity Detection" (SAD) indexing, this array is indicator of which samples will be taken into training; the shape should be [n_samples,] or [n_samples, 1] refit_gmm : bool if True, re-fit the GMM even though it is fitted, consequently, the T-matrix will be re-fitted refit_tmat : bool if True, re-fit the T-matrix even though it is fitted extract_ivecs : bool if True, extract the i-vector for training data keep_stats : bool if True, keep the zero and first order statistics. The first order statistics could consume huge amount of disk space. Otherwise, they are deleted after training """ new_gmm = (not self.gmm.is_fitted or refit_gmm) # ====== clean error files ====== # if os.path.exists(self.z_path): Z = MmapArray(self.z_path) if Z.shape[0] == 0: # empty file os.remove(self.z_path) Z.close() if os.path.exists(self.f_path): F = MmapArray(self.f_path) if F.shape[0] == 0: # empty file os.remove(self.f_path) F.close() if os.path.exists(self.ivec_path): ivec = MmapArray(self.ivec_path) if ivec.shape[0] == 0: # empty file os.remove(self.ivec_path) ivec.close() # ====== Training the GMM first ====== # if new_gmm: input_data = [X] if sad is not None: input_data.append(sad) if indices is not None: input_data.append(indices) self.gmm.fit(input_data) # ====== some fun, and confusing logics ====== # # GMM need to be fitted before creating T-matrix model new_tmat = (not self.tmat.is_fitted or new_gmm or refit_tmat) # New I-vector is need when: # - only when `extract_ivecs=True` # - and new T-matrix is trained but no I-vector is extracted new_ivec = extract_ivecs and \ (new_tmat or not os.path.exists(self.ivec_path)) # new stats is only needed when # - GMM is updated # - training new Tmatrix and the Z and F not exist # - extracting new I-vector and the Z and F not exist if not new_gmm and \ (os.path.exists(self.z_path) and os.path.exists(self.f_path)): new_stats = False else: new_stats = new_gmm or new_tmat or new_ivec # ====== extract the statistics ====== # if new_stats: _extract_zero_and_first_stats(X=X, sad=sad, indices=indices, gmm=self.gmm, z_path=self.z_path, f_path=self.f_path, name_path=self.name_path) # ====== Training the T-matrix and extract i-vector ====== # if new_tmat or new_ivec: Z = MmapArray(path=self.z_path) F = MmapArray(path=self.f_path) if new_tmat: self.tmat.fit((Z, F)) if new_ivec: self.tmat.transform_to_disk(path=self.ivec_path, Z=Z, F=F, dtype='float32', device='gpu', override=True) Z.close() F.close() # ====== clean ====== # if not keep_stats: if os.path.exists(self.z_path): os.remove(self.z_path) if os.path.exists(self.f_path): os.remove(self.f_path) return self