def __flat_start(self, label_data): """ 均一起步 (停止使用) 计算全局均值和方差 :param label_data: 所有数据 :return: """ '''data为所有数据的合集''' data = list(label_data.values()) _data = None size = 0 for index in range(len(data)): size += sum(data[index][1]) tmp_data = data[index][0][0] for d in range(1, len(data[index][0])): tmp_data = np.append(tmp_data, data[index][0][d], axis=0) if _data is None: _data = tmp_data else: _data = np.append(_data, tmp_data, axis=0) label = list(label_data.keys()) _label = [] for l in label: _label.extend(l.split(',')) '''取不重复基元''' label = list(set(_label)) cluster = Clustering.ClusterInitialization(_data, self.__mix_level, self.__vector_size, self.log) mean, covariance, alpha, clustered_data = cluster.kmeans(algorithm=1) '''训练GMM''' tmp_gmm = Clustering.GMM(None, self.__vector_size, self.__mix_level) tmp_gmm.data = data tmp_gmm.mean = mean tmp_gmm.covariance = covariance tmp_gmm.alpha = alpha '''GMM Baulm-Welch迭代''' tmp_gmm.baulm_welch() '''获取均值、协方差和权重值''' mean = tmp_gmm.mean covariance = tmp_gmm.covariance alpha = tmp_gmm.alpha for i in range(len(label)): hmm = self.__unit[label[i]][1] for j in range(1, len(hmm.profunction) - 1): '''除去前后两个虚方法''' gmm = hmm.profunction[j] gmm.mean = mean gmm.covariance = covariance gmm.alpha = alpha
def __flat_start(self, path_list, file_count, proportion=0.25, step=1, differentiation=True, coefficient=1.): """ 均一起步 计算全局均值和方差 :param path_list: 数据路径列表 :param file_count: 数据总量 :param proportion: 训练数据中,用于计算全局均值和协方差的数据占比 :param step: 在帧中跳跃选取的跳跃步长 :param differentiation: GMM中各分模型参数差异化处理 :param coefficient: 差异化程度,区间[0,1] :return: """ self.log.note('flat starting...', cls='i') p_file_count = int(file_count * proportion) p_data = self.__load_audio(path_list[0][0]) p_data = p_data[::step] for index in range(1, p_file_count): data = self.__load_audio(path_list[index][0]) # 加载音频数据 data = data[::step] p_data = np.append(p_data, data, axis=0) cluster = Clustering.ClusterInitialization(p_data, 1, self.__vector_size, self.log) mean, covariance, alpha, clustered_data = cluster.kmeans( algorithm=1, cov_matrix=True) covariance_diagonal = covariance[0].diagonal() units = self.__loaded_units '''''' diff_coefficient = np.zeros((self.__mix_level, 1)) if differentiation: '''差异化处理''' assert 0 <= coefficient <= 1, '差异化系数不满足区间[0,1]' diff_coefficient = (np.random.random( (self.__mix_level, 1)) - np.random.random( (self.__mix_level, 1))) * coefficient for unit in units: hmm = self.init_unit(unit, new_log=True) gmms = hmm.profunction[1:-1] for g in gmms: g.mean = mean.repeat( self.__mix_level, axis=0) + diff_coefficient * covariance_diagonal g.covariance = covariance.repeat(self.__mix_level, axis=0) self.__save_parameter(unit, hmm) self.delete_trainInfo()
def __cal_gmm(self, unit, unit_data, init=False, smem=False, show_q=False, c_covariance=1e-3): """ 计算GMM :param unit: 当前基元 :param unit_data: 基元数据 :param init: 是否初始化 :param smem: 是否进行SMEM算法 :param show_q: 显示当前似然度 :param c_covariance: 修正数值,纠正GMM中的Singular Matrix :return: """ hmm = self.__unit[unit] gmms_num = len(hmm.profunction) - 2 # 高斯混合模型数 for i in range(1, len(hmm.profunction) - 1): '''除去前后两个虚方法''' gmm = hmm.profunction[i] gmm.log.note('正在训练GMM%d,共 %d GMM,混合度为 %d' % (i, gmms_num, self.__mix_level), cls='i') data = unit_data[i - 1] # 对应高斯模型的数据 gmm.add_data(data) if len(data) < self.__mix_level: gmm.log.note('数据过少,忽略该组数据', cls='w') continue if init or gmm.mixture != self.__mix_level: # 当初始化模型或高斯混合度变化时,重新聚类 cluster = Clustering.ClusterInitialization( data, self.__mix_level, self.__vector_size, gmm.log) mean, covariance, alpha, clustered_data = cluster.kmeans( algorithm=1, cov_matrix=True) gmm.mixture = self.__mix_level gmm.mean = mean gmm.covariance = covariance gmm.alpha = alpha '''GMM Baulm-Welch迭代''' gmm.baulm_welch(show_q=show_q, smem=smem, c_covariance=c_covariance) gmm.clear_data() # 清空数据内存
def __cal_gmm(self, hmm, unit_data, init=False, smem=False, show_q=False, c_covariance=1e-3): """ 计算GMM :param hmm: 外部传入HMM实例 :param unit_data: 基元数据 :param init: 是否初始化 :param smem: 是否进行SMEM算法 :param show_q: 显示当前似然度 :param c_covariance: 修正数值,纠正GMM中协方差值过小问题 :return: """ for i in range(1, self.__state_num - 1): '''除去前后两个虚方法''' gmm = hmm.profunction[i] data = unit_data[i - 1] # 对应高斯模型的数据 gmm.add_data(data) if len(data) < self.__mix_level: gmm.log.note('数据过少,忽略该组数据', cls='w') continue if init or gmm.mixture != self.__mix_level: # 当初始化模型或高斯混合度变化时,重新聚类 cluster = Clustering.ClusterInitialization( data, self.__mix_level, self.__vector_size, gmm.log) mean, covariance, alpha, clustered_data = cluster.kmeans( algorithm=1, cov_matrix=True) gmm.mixture = self.__mix_level gmm.mean = mean gmm.covariance = covariance gmm.alpha = alpha '''GMM Baulm-Welch迭代''' gmm.em(show_q=show_q, smem=smem, c_covariance=c_covariance) gmm.clear_data() # 清空数据内存
def __cal_gmm(self, label, c=True): """初始化GMM""" hmm = self.__unit[label][1] for i in range(1, len(hmm.profunction) - 1): '''除去前后两个虚方法''' gmm = hmm.profunction[i] data = gmm.data if len(data) < self.__mix_level: continue if c: '''重新聚类''' cluster = Clustering.ClusterInitialization( data, self.__mix_level, self.__vector_size) μ, σ, alpha, clustered_data = cluster.kmeans(algorithm=1) gmm.set_k(self.__mix_level) gmm.set_μ(μ) gmm.set_sigma(σ=σ) gmm.set_alpha(alpha) '''GMM Baulm-Welch迭代''' gmm.baulm_welch(show_q=True, smem=True) else: gmm.baulm_welch(show_q=True) gmm.clear_data() # 清空数据内存