def init_q_with_kmeans(self,data): ''' Fonction qui initialise notre algorithme EM Paramètres: data:(np.array(nb_samples,nb_composante)) Les échantillons sur lesquels sera calculé EM ''' self.q_e_step = np.zeros([data.shape[0],self.k]) km = KMeans(self.k) km.fit(data) prediction = km.predict(data) for i in range(data.shape[0]): self.q_e_step[i,prediction[i]]=1
def _init_parameters(self,data) : if self.init == 'random' : self.mu = data[np.random.choice(data.shape[0], self.k, replace=False)] self.pi = [1/self.k for j in range(self.k)] self.q = 1/self.k * np.ones((data.shape[0],self.k)) elif self.init == 'kmeans' : clf = KMeans(k=self.k, random_seed=self.random_seed, init='kmeans++') clf.fit(data) self.mu = clf.centers self.pi = [np.sum(clf.labels==j)/data.shape[0] for j in range(self.k)] self.q = np.zeros((data.shape[0],self.k)) for index, label in np.ndenumerate(clf.labels): self.q[index,int(label)] = 1 self.sigma = np.zeros((self.k,data.shape[1],data.shape[1])) if self.format_covariance == 'isotropic' : for j in range(self.k) : sigma_squared = sum([self.q[i,j]*np.dot(x_i-self.mu[j, :], x_i-self.mu[j, :]) for (i,x_i) in enumerate(data)])/(2*np.sum(self.q[:, j])) self.sigma[j] = sigma_squared * np.identity(data.shape[1]) elif self.format_covariance == 'general' : for j in range(self.k) : mu_j = self.mu[j, :].reshape((-1, 1)) self.sigma[j] = sum([self.q[i,j]*(x_i.reshape((-1,1))-mu_j).dot(x_i.reshape((-1,1)).T-mu_j.T) for (i,x_i) in enumerate(data)])/np.sum(self.q[:, j])