def train(self, training_data, extractor_file): with HDF5File(extractor_file, 'w') as f: groups = self.get_extractor_groups() for e, group in zip(self.processors, groups): f.create_group(group) f.cd(group) self.train_one(e, training_data, f, apply=False) f.cd('..')
def fit_adap(data, ubm, save_path): print('Generating GMMs...') msa = em.GMMMachine(64, 60) egy = em.GMMMachine(64, 60) msa_trainer = em.MAP_GMMTrainer(ubm, relevance_factor=10) egy_trainer = em.MAP_GMMTrainer(ubm, relevance_factor=10) em.train(msa_trainer, msa, data['f'], max_iterations=14, convergence_threshold=0.001) em.train(egy_trainer, egy, data['m'], max_iterations=14, convergence_threshold=0.001) msa.save(HDF5File(save_path + 'msa.h5', 'w')) egy.save(HDF5File(save_path + 'egy.h5', 'w'))
def load(self, extractor_file): if not self.requires_training: return with HDF5File(extractor_file) as f: groups = self.get_extractor_groups() for e, group in zip(self.processors, groups): f.cd(group) e.load(f) f.cd('..')
def predict_ubm(data, model_path): try: msa = em.GMMMachine(HDF5File(model_path + 'msa.h5')) egy = em.GMMMachine(HDF5File(model_path + 'egy.h5')) # ubm = joblib.load(model_path + 'ubm.joblib') # msa = joblib.load(model_path + 'msa.joblib') # egy = joblib.load(model_path + 'egy.joblib') except: print("Model not found.") exit() return { 'm': map(lambda x: (msa.log_likelihood(x), egy.log_likelihood(x)), data['m']), 'f': map(lambda x: (msa.log_likelihood(x), egy.log_likelihood(x)), data['f']) }
def fit_ubm(switch_data, non_switch_data, num_components, save_path): print('Generating UBM...') all_data = np.concatenate((switch_data, non_switch_data)) ubm = em.GMMMachine(num_components, all_data.shape[1]) ubm_trainer = em.ML_GMMTrainer(True, True, True) em.train(ubm_trainer, ubm, all_data, max_iterations=12, convergence_threshold=0.001) ubm.save(HDF5File(save_path + 'ubm.h5', 'w')) return ubm
def train(self, training_data, extractor_file): with HDF5File(extractor_file, 'w') as f: groups = self.get_extractor_groups() for i, (e, group) in enumerate(zip(self.processors, groups)): apply = i != len(self.processors) - 1 f.create_group(group) f.cd(group) training_data = self.train_one(e, training_data, f, apply=apply) f.cd('..')
def est(self): mix_file = util.cache_path(self.ubm) ivecdim = self.dim gslist = [] for idx, gfile in self.single_accu_caches.items(): gs = GMMStats(HDF5File(tk.uncached_path(gfile))) gslist.append(gs) gmm = sc.MixtureSet(mix_file) ubm = convert_gmm(gmm) ivm = IVectorMachine(ubm, ivecdim) ivm.variance_threshold = 1e-5 ivtrainer = IVectorTrainer(update_sigma=True) ivtrainer.initialize(ivm, gslist) for i in range(self.iter): ivtrainer.e_step(ivm, gslist) ivtrainer.m_step(ivm) ivm.save(HDF5File(self.t_matrix.get_path(), "w"))
def fit_ubm(data, save_path): print('Generating UBM...') all_data = np.concatenate((data['m'], data['f'])) ubm = em.GMMMachine(64, 60) ubm_trainer = em.ML_GMMTrainer(True, True, True) em.train(ubm_trainer, ubm, all_data, max_iterations=12, convergence_threshold=0.001) ubm.save(HDF5File(save_path + 'ubm.h5', 'w')) # ubm = GaussianMixture(128, covariance_type='diag', init_params='random', warm_start=True, max_iter=12, verbose=1).fit(all_data) # joblib.dump(ubm, save_path + 'ubm.joblib') return ubm
def fit_adap(switch_data, non_switch_data, ubm, num_components, save_path): print('Generating GMMs...') switch_gmm = em.GMMMachine(num_components, switch_data.shape[1]) # non_switch_gmm = em.GMMMachine(num_components, switch_data.shape[1]) switch_trainer = em.MAP_GMMTrainer(ubm, relevance_factor=10, update_variances=False, update_weights=False) # non_switch_trainer = em.MAP_GMMTrainer(ubm, relevance_factor=10, update_variances=False, update_weights=False) em.train(switch_trainer, switch_gmm, switch_data, max_iterations=14, convergence_threshold=0.001) # em.train(non_switch_trainer, non_switch_gmm, non_switch_data, max_iterations=14, # convergence_threshold=0.001) switch_gmm.save(HDF5File(save_path + 'switch.h5', 'w')) # non_switch_gmm.save(HDF5File(save_path + 'non_switch.h5', 'w')) return switch_gmm
def test_user_allocation(): # Tests the correctness of a linear machine c = HDF5File(MACHINE) m = Machine(c) def presumed(ivalue): """Calculates, by hand, the presumed output given the input""" # These are the supposed preloaded values from the file "MACHINE" isub = numpy.array([0., 0.5, 0.5], 'float64') idiv = numpy.array([0.5, 1.0, 1.0], 'float64') w = numpy.array([[0.4, 0.4, 0.2], [0.1, 0.2, 0.7]], 'float64') b = numpy.array([0.3, -3.0], 'float64') act = math.tanh return numpy.array([ act((w[i, :] * ((ivalue - isub) / idiv)).sum() + b[i]) for i in range(w.shape[0]) ], 'float64') testing = [ [1, 1, 1], [0.5, 0.2, 200], [-27, 35.77, 0], [12, 0, 0], ] # 1D case maxerr = numpy.ndarray((2, ), 'float64') maxerr.fill(1e-10) output = numpy.ndarray((2, ), 'float64') for k in testing: input = numpy.array(k, 'float64') m(input, output) assert (abs(presumed(input) - output) < maxerr).all() # 2D case output = numpy.ndarray((len(testing), 2), 'float64') m(testing, output) for i, k in enumerate(testing): input = numpy.array(k, 'float64') assert (abs(presumed(input) - output[i, :]) < maxerr).all()
def test_initialization(): # Two inputs and 1 output m = Machine(2, 1) assert (m.weights == 0.0).all() nose.tools.eq_(m.weights.shape, (2, 1)) assert (m.biases == 0.0).all() nose.tools.eq_(m.biases.shape, (1, )) # Start by providing the data w = numpy.array([[0.4, 0.1], [0.4, 0.2], [0.2, 0.7]], 'float64') m = Machine(w) b = numpy.array([0.3, -3.0], 'float64') isub = numpy.array([0., 0.5, 0.5], 'float64') idiv = numpy.array([0.5, 1.0, 1.0], 'float64') m.input_subtract = isub m.input_divide = idiv m.biases = b m.activation = HyperbolicTangent() assert (m.input_subtract == isub).all() assert (m.input_divide == idiv).all() assert (m.weights == w).all() assert (m.biases == b).all() nose.tools.eq_(m.activation, HyperbolicTangent()) # Save to file # c = HDF5File("bla.hdf5", 'w') # m.save(c) # Start by reading data from a file c = HDF5File(MACHINE) m = Machine(c) assert (m.weights == w).all() assert (m.biases == b).all() # Makes sure we cannot stuff incompatible data w = numpy.array([[0.4, 0.4, 0.2], [0.1, 0.2, 0.7]], 'float64') m = Machine(w) b = numpy.array([0.3, -3.0, 2.7, -18, 52], 'float64') #wrong nose.tools.assert_raises(RuntimeError, setattr, m, 'biases', b) nose.tools.assert_raises(RuntimeError, setattr, m, 'input_subtract', b) nose.tools.assert_raises(RuntimeError, setattr, m, 'input_divide', b)
def forward(self, task_id): mixfile = util.cache_path(self.ubm) ivmfile = tk.uncached_path(self.t_matrix) alignfile = util.cache_path(self.alignment[task_id]) allofile = tk.uncached_path(self.allophones) alloignore = self.allophones_to_ignore featfile = util.cache_path(self.features[task_id]) ivecdim = self.dim lengthnorm = bool(self.length_norm) gmm = sc.MixtureSet(mixfile) ubm = convert_gmm(gmm) ivm = IVectorMachine(ubm, ivecdim) ivm.load(HDF5File(ivmfile)) tmp_ivec_file = tempfile.mktemp(suffix=".ivec") out = sc.FileArchive(tmp_ivec_file) logging.info( "Opening alignment cache '%s' with allophones from '%s'; ignoring '%s'" % (alignfile, allofile, ",".join(alloignore))) aligncache = sc.FileArchive(alignfile) aligncache.setAllophones(allofile) cache = sc.FileArchive(featfile) cur_rec = "" tmp_feat = None tmp_segs = [] for a in sorted(cache.ft.keys()): if a.endswith(".attribs"): continue logging.info("Reading '%s'..." % a) ncorpus, nrec, nseg = a.split("/") try: time, data = cache.read(a, "feat") align = aligncache.read(a, "align") allos = list(aligncache.allophones[i] for (t, i, s, w) in align) T = len(list(filter(lambda al: al not in alloignore, allos))) feat = np.ndarray((T, len(data[0]))) k = 0 for t in range(len(data)): (_, allo, state, weight) = align[t] if aligncache.allophones[allo] not in alloignore: feat[k, :] = data[t] k += 1 except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() logging.error("failed", sys.exc_info(), exc_tb.tb_lineno) ivector = np.zeros([1, ivecdim]) out.addFeatureCache(a, [ivector], [[0.0, 999999.0]]) continue if nrec == cur_rec: tmp_feat = np.concatenate((tmp_feat, feat), axis=0) tmp_segs.append(a) continue else: if cur_rec != "": gs_test = GMMStats(gmm.nMeans, gmm.dim) ivm.ubm.acc_statistics(tmp_feat, gs_test) ivector = ivm.project(gs_test) ivector = ivector / np.linalg.norm(ivector) ivector = np.expand_dims(ivector, 0) for seg in tmp_segs: out.addFeatureCache(seg, [ivector], [[0.0, 999999.0]]) tmp_feat = feat tmp_segs = [a] cur_rec = nrec # last rec gs_test = GMMStats(gmm.nMeans, gmm.dim) ivm.ubm.acc_statistics(tmp_feat, gs_test) ivector = ivm.project(gs_test) if lengthnorm: ivector = ivector / np.linalg.norm(ivector) ivector = np.expand_dims(ivector, 0) for seg in tmp_segs: out.addFeatureCache(seg, [ivector], [[0.0, 999999.0]]) out.finalize() del out # delete this to close the file handle. This ensures all data is written. shutil.move(tmp_ivec_file, self.single_ivec_caches[task_id].get_path())
def acc(self, task_id): mix_file = util.cache_path(self.ubm) align_file = util.cache_path(self.alignment[task_id]) feat_file = util.cache_path(self.features[task_id]) allo_file = util.cache_path(self.allophones) logging.info("Reading mixture file from '%s'..." % mix_file) gmm = sc.MixtureSet(mix_file) logging.info("Read %d means and %d covariances of dimension %d" % (gmm.nMeans, gmm.nCovs, gmm.dim)) ubm = convert_gmm(gmm) ivm = IVectorMachine(ubm, self.dim) ivm.variance_threshold = 1e-5 gs = GMMStats(gmm.nMeans, gmm.dim) logging.info( "Opening alignment cache '%s' with allophones from '%s'; ignoring '%s'" % (align_file, allo_file, ",".join(self.allophones_to_ignore))) aligncache = sc.FileArchive(align_file) aligncache.setAllophones(allo_file) cache = sc.FileArchive(feat_file) for a in cache.ft.keys(): if a.endswith(".attribs"): continue logging.info("Reading '%s'..." % a) time, data = cache.read(a, "feat") align = aligncache.read(a, "align") if len(align) < 1: logging.warning("No data for segment: '%s' in alignment." % a) continue allos = [] for (t, i, s, w) in align: allos.append(aligncache.allophones[i]) allos = list(aligncache.allophones[i] for (t, i, s, w) in align) T = len( list( filter(lambda al: al not in self.allophones_to_ignore, allos))) feat = np.ndarray((T, len(data[0]))) k = 0 for t in range(len(data)): (_, allo, state, weight) = align[t] if aligncache.allophones[ allo] not in self.allophones_to_ignore: feat[k, :] = data[t] k += 1 ivm.ubm.acc_statistics(feat, gs) logging.info("Writing Gaussian statistics to '%s'" % self.single_accu_caches[task_id].get_path()) gs.save(HDF5File(self.single_accu_caches[task_id].get_path(), "w"))
help='Path to load model from, must end in .joblib') args = parser.parse_args() if __name__ == '__main__': if args.extract: data.extract_with_test_utterance(args.window * 16, args.num_features) if args.train: switch_data, non_switch_data = data.load(args.window, args.num_features) ubm = classify.fit_ubm(switch_data, non_switch_data, args.num_components, args.save_path) switch_gmm = classify.fit_adap(switch_data, non_switch_data, ubm, args.num_components, args.save_path) if args.test: test_data = data.load_qual_test() # switch_data, non_switch_data = data.load(args.window, args.num_features, test=True) if not args.train: try: switch_gmm = em.GMMMachine( HDF5File(args.load_path + 'switch.h5')) # non_switch_gmm = em.GMMMachine(HDF5File(args.load_path + 'non_switch.h5')) except: print("Models not found.") exit() ll = classify.switch_ll(test_data, switch_gmm) evaluate.qual_test(ll, 1) # scores = classify.predict(switch_data, non_switch_data, switch_gmm, non_switch_gmm) # Y,Y_pred = evaluate.get_predictions(scores) # evaluate.evaluate(Y, Y_pred) # evaluate.confusion_matrix(Y, Y_pred)