def target_spearman_correlations(data, vars=None, target_var=None): import numpy import statc if vars is None: vars = list(data.domain.variables) if target_var is None: if is_continuous(data.domain.class_var): target_var = data.domain.class_var else: raise ValueError("A data with continuous class variable expected if 'target_var' is not explicitly declared.") all_vars = list(data.domain.variables) indices = [all_vars.index(v) for v in vars] target_index = all_vars.index(target_var) (data,) = data.to_numpy_MA("Ac") averages = numpy.ma.average(data, axis=0) target_values = data[:, target_index].filled(averages[target_index]) target_values = list(target_values) correlations = [] for i, var_i in enumerate(indices): a = data[:,var_i].filled(averages[var_i]) correlations.append(statc.spearmanr(list(a), target_values)[0]) return correlations
def target_spearman_correlations(data, vars=None, target_var=None): import numpy import statc if vars is None: vars = list(data.domain.variables) if target_var is None: if is_continuous(data.domain.class_var): target_var = data.domain.class_var else: raise ValueError( "A data with continuous class variable expected if 'target_var' is not explicitly declared." ) all_vars = list(data.domain.variables) indices = [all_vars.index(v) for v in vars] target_index = all_vars.index(target_var) (data, ) = data.to_numpy_MA("Ac") averages = numpy.ma.average(data, axis=0) target_values = data[:, target_index].filled(averages[target_index]) target_values = list(target_values) correlations = [] for i, var_i in enumerate(indices): a = data[:, var_i].filled(averages[var_i]) correlations.append(statc.spearmanr(list(a), target_values)[0]) return correlations
def distSpearman(x, y): """distance corresponding to 1 - spearman's correlation coefficient for arrays x,y returns distance: 1 - spearman_r """ x = MA.asarray(x) y = MA.asarray(y) assert MA.rank(x) == MA.rank(y) == 1 cond = MA.logical_not(MA.logical_or(MA.getmaskarray(x), MA.getmaskarray(y))) return 1 - statc.spearmanr( MA.compress(cond, x).tolist(), MA.compress(cond, y).tolist())[0]
def __call__(self, e1, e2): X1 = []; X2 = [] for i in self.indxs: if not(e1[i].isSpecial() or e2[i].isSpecial()): X1.append(float(e1[i])) X2.append(float(e2[i])) if not X1: return 1.0 try: return (1.0 - statc.spearmanr(X1, X2)[0]) / 2. except: return 1.0
def compute_attr_dist_matrix(data): import numpy, statc attrs = data.domain.attributes matrix = SymMatrix(len(attrs)) # why not just matrix.items = attrs? matrix.setattr(b"items", attrs) m = data.toNumpyMA("A")[0] averages = numpy.ma.average(m, axis=0) filleds = [list(numpy.ma.filled(m[:, i], averages[i])) for i in range(len(attrs))] for a1, f1 in enumerate(filleds): for a2 in range(a1): matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0 return matrix
def pairwise_spearman_correlations(data, vars=None): import numpy import statc if vars is None: vars = list(data.domain.variables) matrix = Orange.core.SymMatrix(len(vars)) all_vars = list(data.domain.variables) indices = [all_vars.index(v) for v in vars] (data,) = data.to_numpy_MA("Ac") averages = numpy.ma.average(data, axis=0) for i, var_i in enumerate(indices): for j, var_j in enumerate(indices[i + 1:], i + 1): a = data[:, var_i].filled(averages[var_i]) b = data[:, var_j].filled(averages[var_j]) matrix[i, j] = statc.spearmanr(list(a), list(b))[0] return matrix
def pairwise_spearman_correlations(data, vars=None): import numpy import statc if vars is None: vars = list(data.domain.variables) matrix = Orange.core.SymMatrix(len(vars)) all_vars = list(data.domain.variables) indices = [all_vars.index(v) for v in vars] (data, ) = data.to_numpy_MA("Ac") averages = numpy.ma.average(data, axis=0) for i, var_i in enumerate(indices): for j, var_j in enumerate(indices[i + 1:], i + 1): a = data[:, var_i].filled(averages[var_i]) b = data[:, var_j].filled(averages[var_j]) matrix[i, j] = statc.spearmanr(list(a), list(b))[0] return matrix
else: if classInteractions == 3: for a1 in range(len(atts)): for a2 in range(a1): matrix[a1, a2] = (1.0 - orange.PearsonCorrelation(a1, a2, inputdata, 0).r) / 2.0 else: if len(inputdata) < 3: return None import numpy, statc m = inputdata.toNumpyMA("A")[0] averages = numpy.ma.average(m, axis=0) filleds = [list(numpy.ma.filled(m[:,i], averages[i])) for i in range(len(atts))] for a1, f1 in enumerate(filleds): for a2 in range(a1): matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0 output_dict = {} output_dict['dm']=matrix return output_dict def cforange_hierarchical_clustering(input_dict): return {'centroids' : None, 'selected_examples' : None, 'unselected_examples' : None} class Clustering: @staticmethod def hierarchical_clustering(linkage, distance_matrix): import Orange, orange, sys linkages = [("Single linkage", orange.HierarchicalClustering.Single), ("Average linkage", orange.HierarchicalClustering.Average), ("Ward's linkage", orange.HierarchicalClustering.Ward), ("Complete linkage", orange.HierarchicalClustering.Complete)]
def computeMatrix(self): self.error() if self.data: atts = self.data.domain.attributes matrix = orange.SymMatrix(len(atts)) matrix.setattr('items', atts) if self.classInteractions < 3: if self.data.domain.hasContinuousAttributes(): if self.discretizedData is None: self.discretizedData = orange.Preprocessor_discretize( self.data, method=orange.EquiNDiscretization( numberOfIntervals=4)) data = self.discretizedData else: data = self.data # This is ugly, but: Aleks' code which computes Chi2 requires the class attribute because it prepares # some common stuff for all measures. If we want to use his code, we need the class variable, so we # prepare a fake one if not data.domain.classVar: if self.classInteractions == 0: classedDomain = orange.Domain( data.domain.attributes, orange.EnumVariable("foo", values=["0", "1"])) data = orange.ExampleTable(classedDomain, data) else: self.error( "The selected distance measure requires a data set with a class attribute" ) return None im = orngInteract.InteractionMatrix(data, dependencies_too=1) off = 1 if self.classInteractions == 0: diss, labels = im.exportChi2Matrix() off = 0 elif self.classInteractions == 1: (diss, labels) = im.depExportDissimilarityMatrix( jaccard=1) # 2-interactions else: (diss, labels) = im.exportDissimilarityMatrix( jaccard=1) # 3-interactions for i in range(len(atts) - off): for j in range(i + 1): matrix[i + off, j] = diss[i][j] else: if self.classInteractions == 3: for a1 in range(len(atts)): for a2 in range(a1): matrix[a1, a2] = orange.PearsonCorrelation( a1, a2, self.data, 0).p else: import numpy, statc m = self.data.toNumpyMA("A")[0] averages = numpy.ma.average(m, axis=0) filleds = [ list(numpy.ma.filled(m[:, i], averages[i])) for i in range(len(atts)) ] for a1, f1 in enumerate(filleds): for a2 in range(a1): matrix[a1, a2] = statc.spearmanr(f1, filleds[a2])[1] return matrix else: return None
a1, a2, inputdata, 0).r) / 2.0 else: if len(inputdata) < 3: return None import numpy, statc m = inputdata.toNumpyMA("A")[0] averages = numpy.ma.average(m, axis=0) filleds = [ list(numpy.ma.filled(m[:, i], averages[i])) for i in range(len(atts)) ] for a1, f1 in enumerate(filleds): for a2 in range(a1): matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0 output_dict = {} output_dict['dm'] = matrix return output_dict def cforange_hierarchical_clustering(input_dict): return { 'centroids': None, 'selected_examples': None, 'unselected_examples': None } class Clustering: @staticmethod
def computeMatrix(self): self.error(0) if self.data: atts = self.data.domain.attributes if len(atts) < 2: self.error(0, "Dataset must contain at least two attributes") return None matrix = orange.SymMatrix(len(atts)) matrix.setattr('items', atts) if self.classInteractions < 3: if self.data.domain.hasContinuousAttributes(): if self.discretizedData is None: try: self.discretizedData = orange.Preprocessor_discretize(self.data, method=orange.EquiNDiscretization(numberOfIntervals=4)) except orange.KernelException, ex: self.error(0, "An error ocured during data discretization: %s" % ex.message) return None data = self.discretizedData else: data = self.data # This is ugly, but: Aleks' code which computes Chi2 requires the class attribute because it prepares # some common stuff for all measures. If we want to use his code, we need the class variable, so we # prepare a fake one if not data.domain.classVar: if self.classInteractions == 0: classedDomain = orange.Domain(data.domain.attributes, orange.EnumVariable("foo", values=["0", "1"])) data = orange.ExampleTable(classedDomain, data) else: self.error(0, "The selected distance measure requires a data set with a class attribute") return None im = orngInteract.InteractionMatrix(data, dependencies_too=1) off = 1 if self.classInteractions == 0: diss,labels = im.exportChi2Matrix() off = 0 elif self.classInteractions == 1: (diss,labels) = im.depExportDissimilarityMatrix(jaccard=1) # 2-interactions else: (diss,labels) = im.exportDissimilarityMatrix(jaccard=1) # 3-interactions for i in range(len(atts)-off): for j in range(i+1): matrix[i+off, j] = diss[i][j] else: if self.classInteractions == 3: for a1 in range(len(atts)): for a2 in range(a1): matrix[a1, a2] = (1.0 - orange.PearsonCorrelation(a1, a2, self.data, 0).r) / 2.0 else: if len(self.data) < 3: self.error(0, "The selected distance measure requires a data set with at least 3 instances") return None import numpy, statc m = self.data.toNumpyMA("A")[0] averages = numpy.ma.average(m, axis=0) filleds = [list(numpy.ma.filled(m[:,i], averages[i])) for i in range(len(atts))] for a1, f1 in enumerate(filleds): for a2 in range(a1): matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0 return matrix