Ejemplo n.º 1
0
def target_spearman_correlations(data, vars=None, target_var=None):
    import numpy
    import statc
    
    if vars is None:
        vars = list(data.domain.variables)
    
    if target_var is None:
        if is_continuous(data.domain.class_var):
            target_var = data.domain.class_var
        else:
            raise ValueError("A data with continuous class variable expected if 'target_var' is not explicitly declared.")
    
    all_vars = list(data.domain.variables)
    indices = [all_vars.index(v) for v in vars]
    target_index = all_vars.index(target_var)
    (data,) = data.to_numpy_MA("Ac")
    
    averages = numpy.ma.average(data, axis=0)
    target_values = data[:, target_index].filled(averages[target_index])
    target_values = list(target_values)
    
    correlations = []
    for i, var_i in enumerate(indices):
        a = data[:,var_i].filled(averages[var_i])
        correlations.append(statc.spearmanr(list(a), target_values)[0])
        
    return correlations
Ejemplo n.º 2
0
def target_spearman_correlations(data, vars=None, target_var=None):
    import numpy
    import statc

    if vars is None:
        vars = list(data.domain.variables)

    if target_var is None:
        if is_continuous(data.domain.class_var):
            target_var = data.domain.class_var
        else:
            raise ValueError(
                "A data with continuous class variable expected if 'target_var' is not explicitly declared."
            )

    all_vars = list(data.domain.variables)
    indices = [all_vars.index(v) for v in vars]
    target_index = all_vars.index(target_var)
    (data, ) = data.to_numpy_MA("Ac")

    averages = numpy.ma.average(data, axis=0)
    target_values = data[:, target_index].filled(averages[target_index])
    target_values = list(target_values)

    correlations = []
    for i, var_i in enumerate(indices):
        a = data[:, var_i].filled(averages[var_i])
        correlations.append(statc.spearmanr(list(a), target_values)[0])

    return correlations
Ejemplo n.º 3
0
def distSpearman(x, y):
    """distance corresponding to 1 - spearman's correlation coefficient for arrays x,y
    returns distance: 1 - spearman_r
    """
    x = MA.asarray(x)
    y = MA.asarray(y)
    assert MA.rank(x) == MA.rank(y) == 1
    cond = MA.logical_not(MA.logical_or(MA.getmaskarray(x),
                                        MA.getmaskarray(y)))
    return 1 - statc.spearmanr(
        MA.compress(cond, x).tolist(),
        MA.compress(cond, y).tolist())[0]
Ejemplo n.º 4
0
 def __call__(self, e1, e2):
     X1 = []; X2 = []
     for i in self.indxs:
         if not(e1[i].isSpecial() or e2[i].isSpecial()):
             X1.append(float(e1[i]))
             X2.append(float(e2[i]))
     if not X1:
         return 1.0
     try:
         return (1.0 - statc.spearmanr(X1, X2)[0]) / 2.
     except:
         return 1.0
Ejemplo n.º 5
0
def compute_attr_dist_matrix(data):
    import numpy, statc

    attrs = data.domain.attributes
    matrix = SymMatrix(len(attrs))

    # why not just matrix.items = attrs?
    matrix.setattr(b"items", attrs)

    m = data.toNumpyMA("A")[0]
    averages = numpy.ma.average(m, axis=0)
    filleds = [list(numpy.ma.filled(m[:, i], averages[i])) for i in range(len(attrs))]
    for a1, f1 in enumerate(filleds):
        for a2 in range(a1):
            matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0
    return matrix
Ejemplo n.º 6
0
def pairwise_spearman_correlations(data, vars=None):
    import numpy
    import statc
    
    if vars is None:
        vars = list(data.domain.variables)
    
    matrix = Orange.core.SymMatrix(len(vars))
    
    all_vars = list(data.domain.variables)
    indices = [all_vars.index(v) for v in vars]
    (data,) = data.to_numpy_MA("Ac")
    
    averages = numpy.ma.average(data, axis=0)
    
    for i, var_i in enumerate(indices):
        for j, var_j in enumerate(indices[i + 1:], i + 1):
            a = data[:, var_i].filled(averages[var_i])
            b = data[:, var_j].filled(averages[var_j])
            matrix[i, j] = statc.spearmanr(list(a), list(b))[0]
            
    return matrix
Ejemplo n.º 7
0
def pairwise_spearman_correlations(data, vars=None):
    import numpy
    import statc

    if vars is None:
        vars = list(data.domain.variables)

    matrix = Orange.core.SymMatrix(len(vars))

    all_vars = list(data.domain.variables)
    indices = [all_vars.index(v) for v in vars]
    (data, ) = data.to_numpy_MA("Ac")

    averages = numpy.ma.average(data, axis=0)

    for i, var_i in enumerate(indices):
        for j, var_j in enumerate(indices[i + 1:], i + 1):
            a = data[:, var_i].filled(averages[var_i])
            b = data[:, var_j].filled(averages[var_j])
            matrix[i, j] = statc.spearmanr(list(a), list(b))[0]

    return matrix
Ejemplo n.º 8
0
    else:
        if classInteractions == 3:
            for a1 in range(len(atts)):
                for a2 in range(a1):
                    matrix[a1, a2] = (1.0 - orange.PearsonCorrelation(a1, a2, inputdata, 0).r) / 2.0
        else:
            if len(inputdata) < 3:
                return None
            import numpy, statc
            m = inputdata.toNumpyMA("A")[0]
            averages = numpy.ma.average(m, axis=0)
            filleds = [list(numpy.ma.filled(m[:,i], averages[i])) for i in range(len(atts))]
            for a1, f1 in enumerate(filleds):
                for a2 in range(a1):
                    matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0
    output_dict = {}
    output_dict['dm']=matrix        
    return output_dict

def cforange_hierarchical_clustering(input_dict):
    return {'centroids' : None, 'selected_examples' : None, 'unselected_examples' : None}

class Clustering:
    @staticmethod
    def hierarchical_clustering(linkage, distance_matrix):
        import Orange, orange, sys
        linkages = [("Single linkage", orange.HierarchicalClustering.Single),
                    ("Average linkage", orange.HierarchicalClustering.Average),
                    ("Ward's linkage", orange.HierarchicalClustering.Ward),
                    ("Complete linkage", orange.HierarchicalClustering.Complete)]
Ejemplo n.º 9
0
    def computeMatrix(self):
        self.error()
        if self.data:
            atts = self.data.domain.attributes
            matrix = orange.SymMatrix(len(atts))
            matrix.setattr('items', atts)

            if self.classInteractions < 3:
                if self.data.domain.hasContinuousAttributes():
                    if self.discretizedData is None:
                        self.discretizedData = orange.Preprocessor_discretize(
                            self.data,
                            method=orange.EquiNDiscretization(
                                numberOfIntervals=4))
                    data = self.discretizedData
                else:
                    data = self.data

                # This is ugly, but: Aleks' code which computes Chi2 requires the class attribute because it prepares
                # some common stuff for all measures. If we want to use his code, we need the class variable, so we
                # prepare a fake one
                if not data.domain.classVar:
                    if self.classInteractions == 0:
                        classedDomain = orange.Domain(
                            data.domain.attributes,
                            orange.EnumVariable("foo", values=["0", "1"]))
                        data = orange.ExampleTable(classedDomain, data)
                    else:
                        self.error(
                            "The selected distance measure requires a data set with a class attribute"
                        )
                        return None

                im = orngInteract.InteractionMatrix(data, dependencies_too=1)
                off = 1
                if self.classInteractions == 0:
                    diss, labels = im.exportChi2Matrix()
                    off = 0
                elif self.classInteractions == 1:
                    (diss, labels) = im.depExportDissimilarityMatrix(
                        jaccard=1)  # 2-interactions
                else:
                    (diss, labels) = im.exportDissimilarityMatrix(
                        jaccard=1)  # 3-interactions

                for i in range(len(atts) - off):
                    for j in range(i + 1):
                        matrix[i + off, j] = diss[i][j]

            else:
                if self.classInteractions == 3:
                    for a1 in range(len(atts)):
                        for a2 in range(a1):
                            matrix[a1, a2] = orange.PearsonCorrelation(
                                a1, a2, self.data, 0).p
                else:
                    import numpy, statc
                    m = self.data.toNumpyMA("A")[0]
                    averages = numpy.ma.average(m, axis=0)
                    filleds = [
                        list(numpy.ma.filled(m[:, i], averages[i]))
                        for i in range(len(atts))
                    ]
                    for a1, f1 in enumerate(filleds):
                        for a2 in range(a1):
                            matrix[a1, a2] = statc.spearmanr(f1,
                                                             filleds[a2])[1]

            return matrix
        else:
            return None
Ejemplo n.º 10
0
                        a1, a2, inputdata, 0).r) / 2.0
        else:
            if len(inputdata) < 3:
                return None
            import numpy, statc
            m = inputdata.toNumpyMA("A")[0]
            averages = numpy.ma.average(m, axis=0)
            filleds = [
                list(numpy.ma.filled(m[:, i], averages[i]))
                for i in range(len(atts))
            ]
            for a1, f1 in enumerate(filleds):
                for a2 in range(a1):
                    matrix[a1,
                           a2] = (1.0 -
                                  statc.spearmanr(f1, filleds[a2])[0]) / 2.0
    output_dict = {}
    output_dict['dm'] = matrix
    return output_dict


def cforange_hierarchical_clustering(input_dict):
    return {
        'centroids': None,
        'selected_examples': None,
        'unselected_examples': None
    }


class Clustering:
    @staticmethod
Ejemplo n.º 11
0
    def computeMatrix(self):
        self.error(0)
        if self.data:
            atts = self.data.domain.attributes
            if len(atts) < 2:
                self.error(0, "Dataset must contain at least two attributes")
                return None
            matrix = orange.SymMatrix(len(atts))
            matrix.setattr('items', atts)
            if self.classInteractions < 3:
                if self.data.domain.hasContinuousAttributes():
                    if self.discretizedData is None:
                        try:
                            self.discretizedData = orange.Preprocessor_discretize(self.data, method=orange.EquiNDiscretization(numberOfIntervals=4))
                        except orange.KernelException, ex:
                            self.error(0, "An error ocured during data discretization: %s" % ex.message)
                            return None
                    data = self.discretizedData
                else:
                    data = self.data

                # This is ugly, but: Aleks' code which computes Chi2 requires the class attribute because it prepares
                # some common stuff for all measures. If we want to use his code, we need the class variable, so we
                # prepare a fake one
                if not data.domain.classVar:
                    if self.classInteractions == 0:
                        classedDomain = orange.Domain(data.domain.attributes, orange.EnumVariable("foo", values=["0", "1"]))
                        data = orange.ExampleTable(classedDomain, data)
                    else:
                        self.error(0, "The selected distance measure requires a data set with a class attribute")
                        return None

                im = orngInteract.InteractionMatrix(data, dependencies_too=1)
                off = 1
                if self.classInteractions == 0:
                    diss,labels = im.exportChi2Matrix()
                    off = 0
                elif self.classInteractions == 1:
                    (diss,labels) = im.depExportDissimilarityMatrix(jaccard=1)  # 2-interactions
                else:
                    (diss,labels) = im.exportDissimilarityMatrix(jaccard=1)  # 3-interactions

                for i in range(len(atts)-off):
                    for j in range(i+1):
                        matrix[i+off, j] = diss[i][j]

            else:
                if self.classInteractions == 3:
                    for a1 in range(len(atts)):
                        for a2 in range(a1):
                            matrix[a1, a2] = (1.0 - orange.PearsonCorrelation(a1, a2, self.data, 0).r) / 2.0
                else:
                    if len(self.data) < 3:
                        self.error(0, "The selected distance measure requires a data set with at least 3 instances")
                        return None
                    import numpy, statc
                    m = self.data.toNumpyMA("A")[0]
                    averages = numpy.ma.average(m, axis=0)
                    filleds = [list(numpy.ma.filled(m[:,i], averages[i])) for i in range(len(atts))]
                    for a1, f1 in enumerate(filleds):
                        for a2 in range(a1):
                            matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0
                
            return matrix