Esempio n. 1
0
def hierarchicalClustering_attributes(data, distance=None, linkage=orange.HierarchicalClustering.Average, order=False, progressCallback=None):
    """Return hierarhical clustering of attributes in the data set."""
    matrix = orange.SymMatrix(len(data.domain.attributes))
    for a1 in range(len(data.domain.attributes)):
        for a2 in range(a1):
            matrix[a1, a2] = orange.PearsonCorrelation(a1, a2, data, 0).p
    root = orange.HierarchicalClustering(matrix, linkage=linkage, progressCallback=progressCallback)
    if order:
        orderLeaves(root, matrix, progressCallback=progressCallback)
    return root
Esempio n. 2
0
            diss,labels = im.exportChi2Matrix()
            off = 0
        elif classInteractions == 1:
            (diss,labels) = im.depExportDissimilarityMatrix(jaccard=1)  # 2-interactions
        else:
            (diss,labels) = im.exportDissimilarityMatrix(jaccard=1)  # 3-interactions

        for i in range(len(atts)-off):
            for j in range(i+1):
                matrix[i+off, j] = diss[i][j]

    else:
        if classInteractions == 3:
            for a1 in range(len(atts)):
                for a2 in range(a1):
                    matrix[a1, a2] = (1.0 - orange.PearsonCorrelation(a1, a2, inputdata, 0).r) / 2.0
        else:
            if len(inputdata) < 3:
                return None
            import numpy, statc
            m = inputdata.toNumpyMA("A")[0]
            averages = numpy.ma.average(m, axis=0)
            filleds = [list(numpy.ma.filled(m[:,i], averages[i])) for i in range(len(atts))]
            for a1, f1 in enumerate(filleds):
                for a2 in range(a1):
                    matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0
    output_dict = {}
    output_dict['dm']=matrix        
    return output_dict

def cforange_hierarchical_clustering(input_dict):
Esempio n. 3
0
    def computeMatrix(self):
        self.error()
        if self.data:
            atts = self.data.domain.attributes
            matrix = orange.SymMatrix(len(atts))
            matrix.setattr('items', atts)

            if self.classInteractions < 3:
                if self.data.domain.hasContinuousAttributes():
                    if self.discretizedData is None:
                        self.discretizedData = orange.Preprocessor_discretize(
                            self.data,
                            method=orange.EquiNDiscretization(
                                numberOfIntervals=4))
                    data = self.discretizedData
                else:
                    data = self.data

                # This is ugly, but: Aleks' code which computes Chi2 requires the class attribute because it prepares
                # some common stuff for all measures. If we want to use his code, we need the class variable, so we
                # prepare a fake one
                if not data.domain.classVar:
                    if self.classInteractions == 0:
                        classedDomain = orange.Domain(
                            data.domain.attributes,
                            orange.EnumVariable("foo", values=["0", "1"]))
                        data = orange.ExampleTable(classedDomain, data)
                    else:
                        self.error(
                            "The selected distance measure requires a data set with a class attribute"
                        )
                        return None

                im = orngInteract.InteractionMatrix(data, dependencies_too=1)
                off = 1
                if self.classInteractions == 0:
                    diss, labels = im.exportChi2Matrix()
                    off = 0
                elif self.classInteractions == 1:
                    (diss, labels) = im.depExportDissimilarityMatrix(
                        jaccard=1)  # 2-interactions
                else:
                    (diss, labels) = im.exportDissimilarityMatrix(
                        jaccard=1)  # 3-interactions

                for i in range(len(atts) - off):
                    for j in range(i + 1):
                        matrix[i + off, j] = diss[i][j]

            else:
                if self.classInteractions == 3:
                    for a1 in range(len(atts)):
                        for a2 in range(a1):
                            matrix[a1, a2] = orange.PearsonCorrelation(
                                a1, a2, self.data, 0).p
                else:
                    import numpy, statc
                    m = self.data.toNumpyMA("A")[0]
                    averages = numpy.ma.average(m, axis=0)
                    filleds = [
                        list(numpy.ma.filled(m[:, i], averages[i]))
                        for i in range(len(atts))
                    ]
                    for a1, f1 in enumerate(filleds):
                        for a2 in range(a1):
                            matrix[a1, a2] = statc.spearmanr(f1,
                                                             filleds[a2])[1]

            return matrix
        else:
            return None