def iteration(self,data_sets):
     covs=[]
     avgs=[]
     new_data_sets=[[] for _ in range(len(data_sets))]
     #get the expectation and covariance matrix
     for data_set in data_sets:
         cov=Multi_Dimension_Data_Statictis.get_covariance_matrix(data_set)
         avg=Multi_Dimension_Data_Statictis.get_average(data_set)
         covs.append(cov)
         avgs.append(avg)
     # assign data to cluster
     for data in self.data:
         max_p=0
         max_p_index=0
         for index in range(len(data_sets)):
             gauss_value=Gaussian.gaussian([[_n] for _n in data], avgs[index], covs[index])
             if gauss_value>max_p:
                 max_p=gauss_value
                 max_p_index=index
         new_data_sets[max_p_index].append(data)
     covs=[]
     avgs=[]
     #calculate the new expectation and covariance matrix
     for data_set in data_sets:
         cov=Multi_Dimension_Data_Statictis.get_covariance_matrix(data_set)
         avg=Multi_Dimension_Data_Statictis.get_average(data_set)
         covs.append(cov)
         avgs.append(avg)
     likehood=0
     # calculate the likelihood
     for index in range(len(data_sets)):
         temp=0
         for data in data_sets[index]:
             gauss_value=Gaussian.gaussian([[_n] for _n in data], avgs[index], covs[index])
             temp+=gauss_value
         likehood+=log(temp)
     return likehood,new_data_sets
    def analysis(self,k):
#         Calculate the empirical mean
        means=Multi_Dimension_Data_Statictis.get_average(self.data)
#         Calculate the deviations from the mean
        deviations=Multi_Dimension_Data_Statictis.get_deviations(self.data) #unused
        mean_subtracted_data=Matrix.minus(self.data, Matrix.multiply([[1] for _ in range(len(self.data))], Matrix.transpose(means)))
#         Find the covariance matrix
        covariance_matrix=Multi_Dimension_Data_Statictis.get_covariance_matrix(mean_subtracted_data)
#         Find the eigenvectors and eigenvalues of the covariance matrix
        x= np.mat(covariance_matrix)
        eigenvalues,eigenvectors=np.linalg.eigh(x)
        eigenvalues=eigenvalues.tolist()
        eigenvectors=Matrix.transpose(eigenvectors.tolist())
#         Rearrange the eigenvectors and eigenvalues
        eigenvalue_and_eigenvector=[]
        for i in range(len(eigenvalues)):
            eigenvalue_and_eigenvector.append((eigenvalues[i],eigenvectors[i]))
        
        eigenvalue_and_eigenvector=sorted(eigenvalue_and_eigenvector, reverse=True)
#         Choosing k eigenvectors with the largest eigenvalues
        transform_matrix=[]
        for i in range(k):
            transform_matrix.append(eigenvalue_and_eigenvector[i][1])
        return Matrix.transpose(Matrix.multiply(transform_matrix,Matrix.transpose(self.data)))
 def test_get_variance(self):
     data=[[1,2,3],[4,5,6],[7,8,9]]
     expected_result=[[6],[6],[6]]
     actual_result=Multi_Dimension_Data_Statictis.get_variance(data)
     self.assertEqual(expected_result, actual_result)