예제 #1
0
    def __flat_start(self, label_data):
        """
        均一起步
        (停止使用)
        计算全局均值和方差
        :param label_data: 所有数据
        :return:
        """
        '''data为所有数据的合集'''
        data = list(label_data.values())
        _data = None
        size = 0
        for index in range(len(data)):
            size += sum(data[index][1])
            tmp_data = data[index][0][0]
            for d in range(1, len(data[index][0])):
                tmp_data = np.append(tmp_data, data[index][0][d], axis=0)
            if _data is None:
                _data = tmp_data
            else:
                _data = np.append(_data, tmp_data, axis=0)
        label = list(label_data.keys())
        _label = []
        for l in label:
            _label.extend(l.split(','))
        '''取不重复基元'''
        label = list(set(_label))
        cluster = Clustering.ClusterInitialization(_data, self.__mix_level,
                                                   self.__vector_size,
                                                   self.log)
        mean, covariance, alpha, clustered_data = cluster.kmeans(algorithm=1)
        '''训练GMM'''
        tmp_gmm = Clustering.GMM(None, self.__vector_size, self.__mix_level)
        tmp_gmm.data = data
        tmp_gmm.mean = mean
        tmp_gmm.covariance = covariance
        tmp_gmm.alpha = alpha
        '''GMM Baulm-Welch迭代'''
        tmp_gmm.baulm_welch()
        '''获取均值、协方差和权重值'''
        mean = tmp_gmm.mean
        covariance = tmp_gmm.covariance
        alpha = tmp_gmm.alpha

        for i in range(len(label)):
            hmm = self.__unit[label[i]][1]
            for j in range(1, len(hmm.profunction) - 1):
                '''除去前后两个虚方法'''
                gmm = hmm.profunction[j]
                gmm.mean = mean
                gmm.covariance = covariance
                gmm.alpha = alpha
예제 #2
0
 def __flat_start(self,
                  path_list,
                  file_count,
                  proportion=0.25,
                  step=1,
                  differentiation=True,
                  coefficient=1.):
     """
         均一起步
     计算全局均值和方差
     :param path_list: 数据路径列表
     :param file_count: 数据总量
     :param proportion: 训练数据中,用于计算全局均值和协方差的数据占比
     :param step: 在帧中跳跃选取的跳跃步长
     :param differentiation: GMM中各分模型参数差异化处理
     :param coefficient: 差异化程度,区间[0,1]
     :return:
     """
     self.log.note('flat starting...', cls='i')
     p_file_count = int(file_count * proportion)
     p_data = self.__load_audio(path_list[0][0])
     p_data = p_data[::step]
     for index in range(1, p_file_count):
         data = self.__load_audio(path_list[index][0])  # 加载音频数据
         data = data[::step]
         p_data = np.append(p_data, data, axis=0)
     cluster = Clustering.ClusterInitialization(p_data, 1,
                                                self.__vector_size,
                                                self.log)
     mean, covariance, alpha, clustered_data = cluster.kmeans(
         algorithm=1, cov_matrix=True)
     covariance_diagonal = covariance[0].diagonal()
     units = self.__loaded_units
     ''''''
     diff_coefficient = np.zeros((self.__mix_level, 1))
     if differentiation:
         '''差异化处理'''
         assert 0 <= coefficient <= 1, '差异化系数不满足区间[0,1]'
         diff_coefficient = (np.random.random(
             (self.__mix_level, 1)) - np.random.random(
                 (self.__mix_level, 1))) * coefficient
     for unit in units:
         hmm = self.init_unit(unit, new_log=True)
         gmms = hmm.profunction[1:-1]
         for g in gmms:
             g.mean = mean.repeat(
                 self.__mix_level,
                 axis=0) + diff_coefficient * covariance_diagonal
             g.covariance = covariance.repeat(self.__mix_level, axis=0)
         self.__save_parameter(unit, hmm)
     self.delete_trainInfo()
예제 #3
0
    def __cal_gmm(self,
                  unit,
                  unit_data,
                  init=False,
                  smem=False,
                  show_q=False,
                  c_covariance=1e-3):
        """
        计算GMM
        :param unit: 当前基元
        :param unit_data: 基元数据
        :param init: 是否初始化
        :param smem: 是否进行SMEM算法
        :param show_q: 显示当前似然度
        :param c_covariance: 修正数值,纠正GMM中的Singular Matrix
        :return:
        """
        hmm = self.__unit[unit]
        gmms_num = len(hmm.profunction) - 2  # 高斯混合模型数

        for i in range(1, len(hmm.profunction) - 1):
            '''除去前后两个虚方法'''

            gmm = hmm.profunction[i]
            gmm.log.note('正在训练GMM%d,共 %d GMM,混合度为 %d' %
                         (i, gmms_num, self.__mix_level),
                         cls='i')
            data = unit_data[i - 1]  # 对应高斯模型的数据
            gmm.add_data(data)

            if len(data) < self.__mix_level:
                gmm.log.note('数据过少,忽略该组数据', cls='w')
                continue
            if init or gmm.mixture != self.__mix_level:  # 当初始化模型或高斯混合度变化时,重新聚类
                cluster = Clustering.ClusterInitialization(
                    data, self.__mix_level, self.__vector_size, gmm.log)
                mean, covariance, alpha, clustered_data = cluster.kmeans(
                    algorithm=1, cov_matrix=True)
                gmm.mixture = self.__mix_level
                gmm.mean = mean
                gmm.covariance = covariance
                gmm.alpha = alpha
            '''GMM Baulm-Welch迭代'''
            gmm.baulm_welch(show_q=show_q,
                            smem=smem,
                            c_covariance=c_covariance)
            gmm.clear_data()  # 清空数据内存
예제 #4
0
    def __cal_gmm(self,
                  hmm,
                  unit_data,
                  init=False,
                  smem=False,
                  show_q=False,
                  c_covariance=1e-3):
        """
        计算GMM
        :param hmm: 外部传入HMM实例
        :param unit_data: 基元数据
        :param init: 是否初始化
        :param smem: 是否进行SMEM算法
        :param show_q: 显示当前似然度
        :param c_covariance: 修正数值,纠正GMM中协方差值过小问题
        :return:
        """
        for i in range(1, self.__state_num - 1):
            '''除去前后两个虚方法'''
            gmm = hmm.profunction[i]
            data = unit_data[i - 1]  # 对应高斯模型的数据
            gmm.add_data(data)

            if len(data) < self.__mix_level:
                gmm.log.note('数据过少,忽略该组数据', cls='w')
                continue
            if init or gmm.mixture != self.__mix_level:  # 当初始化模型或高斯混合度变化时,重新聚类
                cluster = Clustering.ClusterInitialization(
                    data, self.__mix_level, self.__vector_size, gmm.log)
                mean, covariance, alpha, clustered_data = cluster.kmeans(
                    algorithm=1, cov_matrix=True)
                gmm.mixture = self.__mix_level
                gmm.mean = mean
                gmm.covariance = covariance
                gmm.alpha = alpha
            '''GMM Baulm-Welch迭代'''
            gmm.em(show_q=show_q, smem=smem, c_covariance=c_covariance)
            gmm.clear_data()  # 清空数据内存
예제 #5
0
 def __cal_gmm(self, label, c=True):
     """初始化GMM"""
     hmm = self.__unit[label][1]
     for i in range(1, len(hmm.profunction) - 1):
         '''除去前后两个虚方法'''
         gmm = hmm.profunction[i]
         data = gmm.data
         if len(data) < self.__mix_level:
             continue
         if c:
             '''重新聚类'''
             cluster = Clustering.ClusterInitialization(
                 data, self.__mix_level, self.__vector_size)
             μ, σ, alpha, clustered_data = cluster.kmeans(algorithm=1)
             gmm.set_k(self.__mix_level)
             gmm.set_μ(μ)
             gmm.set_sigma(σ=σ)
             gmm.set_alpha(alpha)
             '''GMM Baulm-Welch迭代'''
             gmm.baulm_welch(show_q=True, smem=True)
         else:
             gmm.baulm_welch(show_q=True)
         gmm.clear_data()  # 清空数据内存