def iteration(self,data_sets): covs=[] avgs=[] new_data_sets=[[] for _ in range(len(data_sets))] #get the expectation and covariance matrix for data_set in data_sets: cov=Multi_Dimension_Data_Statictis.get_covariance_matrix(data_set) avg=Multi_Dimension_Data_Statictis.get_average(data_set) covs.append(cov) avgs.append(avg) # assign data to cluster for data in self.data: max_p=0 max_p_index=0 for index in range(len(data_sets)): gauss_value=Gaussian.gaussian([[_n] for _n in data], avgs[index], covs[index]) if gauss_value>max_p: max_p=gauss_value max_p_index=index new_data_sets[max_p_index].append(data) covs=[] avgs=[] #calculate the new expectation and covariance matrix for data_set in data_sets: cov=Multi_Dimension_Data_Statictis.get_covariance_matrix(data_set) avg=Multi_Dimension_Data_Statictis.get_average(data_set) covs.append(cov) avgs.append(avg) likehood=0 # calculate the likelihood for index in range(len(data_sets)): temp=0 for data in data_sets[index]: gauss_value=Gaussian.gaussian([[_n] for _n in data], avgs[index], covs[index]) temp+=gauss_value likehood+=log(temp) return likehood,new_data_sets
def analysis(self,k): # Calculate the empirical mean means=Multi_Dimension_Data_Statictis.get_average(self.data) # Calculate the deviations from the mean deviations=Multi_Dimension_Data_Statictis.get_deviations(self.data) #unused mean_subtracted_data=Matrix.minus(self.data, Matrix.multiply([[1] for _ in range(len(self.data))], Matrix.transpose(means))) # Find the covariance matrix covariance_matrix=Multi_Dimension_Data_Statictis.get_covariance_matrix(mean_subtracted_data) # Find the eigenvectors and eigenvalues of the covariance matrix x= np.mat(covariance_matrix) eigenvalues,eigenvectors=np.linalg.eigh(x) eigenvalues=eigenvalues.tolist() eigenvectors=Matrix.transpose(eigenvectors.tolist()) # Rearrange the eigenvectors and eigenvalues eigenvalue_and_eigenvector=[] for i in range(len(eigenvalues)): eigenvalue_and_eigenvector.append((eigenvalues[i],eigenvectors[i])) eigenvalue_and_eigenvector=sorted(eigenvalue_and_eigenvector, reverse=True) # Choosing k eigenvectors with the largest eigenvalues transform_matrix=[] for i in range(k): transform_matrix.append(eigenvalue_and_eigenvector[i][1]) return Matrix.transpose(Matrix.multiply(transform_matrix,Matrix.transpose(self.data)))
def test_get_variance(self): data=[[1,2,3],[4,5,6],[7,8,9]] expected_result=[[6],[6],[6]] actual_result=Multi_Dimension_Data_Statictis.get_variance(data) self.assertEqual(expected_result, actual_result)