def hierarchicalClustering_attributes(data, distance=None, linkage=orange.HierarchicalClustering.Average, order=False, progressCallback=None): """Return hierarhical clustering of attributes in the data set.""" matrix = orange.SymMatrix(len(data.domain.attributes)) for a1 in range(len(data.domain.attributes)): for a2 in range(a1): matrix[a1, a2] = orange.PearsonCorrelation(a1, a2, data, 0).p root = orange.HierarchicalClustering(matrix, linkage=linkage, progressCallback=progressCallback) if order: orderLeaves(root, matrix, progressCallback=progressCallback) return root
diss,labels = im.exportChi2Matrix() off = 0 elif classInteractions == 1: (diss,labels) = im.depExportDissimilarityMatrix(jaccard=1) # 2-interactions else: (diss,labels) = im.exportDissimilarityMatrix(jaccard=1) # 3-interactions for i in range(len(atts)-off): for j in range(i+1): matrix[i+off, j] = diss[i][j] else: if classInteractions == 3: for a1 in range(len(atts)): for a2 in range(a1): matrix[a1, a2] = (1.0 - orange.PearsonCorrelation(a1, a2, inputdata, 0).r) / 2.0 else: if len(inputdata) < 3: return None import numpy, statc m = inputdata.toNumpyMA("A")[0] averages = numpy.ma.average(m, axis=0) filleds = [list(numpy.ma.filled(m[:,i], averages[i])) for i in range(len(atts))] for a1, f1 in enumerate(filleds): for a2 in range(a1): matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0 output_dict = {} output_dict['dm']=matrix return output_dict def cforange_hierarchical_clustering(input_dict):
def computeMatrix(self): self.error() if self.data: atts = self.data.domain.attributes matrix = orange.SymMatrix(len(atts)) matrix.setattr('items', atts) if self.classInteractions < 3: if self.data.domain.hasContinuousAttributes(): if self.discretizedData is None: self.discretizedData = orange.Preprocessor_discretize( self.data, method=orange.EquiNDiscretization( numberOfIntervals=4)) data = self.discretizedData else: data = self.data # This is ugly, but: Aleks' code which computes Chi2 requires the class attribute because it prepares # some common stuff for all measures. If we want to use his code, we need the class variable, so we # prepare a fake one if not data.domain.classVar: if self.classInteractions == 0: classedDomain = orange.Domain( data.domain.attributes, orange.EnumVariable("foo", values=["0", "1"])) data = orange.ExampleTable(classedDomain, data) else: self.error( "The selected distance measure requires a data set with a class attribute" ) return None im = orngInteract.InteractionMatrix(data, dependencies_too=1) off = 1 if self.classInteractions == 0: diss, labels = im.exportChi2Matrix() off = 0 elif self.classInteractions == 1: (diss, labels) = im.depExportDissimilarityMatrix( jaccard=1) # 2-interactions else: (diss, labels) = im.exportDissimilarityMatrix( jaccard=1) # 3-interactions for i in range(len(atts) - off): for j in range(i + 1): matrix[i + off, j] = diss[i][j] else: if self.classInteractions == 3: for a1 in range(len(atts)): for a2 in range(a1): matrix[a1, a2] = orange.PearsonCorrelation( a1, a2, self.data, 0).p else: import numpy, statc m = self.data.toNumpyMA("A")[0] averages = numpy.ma.average(m, axis=0) filleds = [ list(numpy.ma.filled(m[:, i], averages[i])) for i in range(len(atts)) ] for a1, f1 in enumerate(filleds): for a2 in range(a1): matrix[a1, a2] = statc.spearmanr(f1, filleds[a2])[1] return matrix else: return None