def setUp(self): v = 0 self.m = orange.SymMatrix(5) for i in range(5): for j in range(i + 1): self.m[i, j] = v v += 1 self.zerom = orange.SymMatrix(0)
def perform_orange_clustering(mss_id): import orange from task_similarities import TreeNode import helper #(dist_full, id_to_name) = compute_task_distances(mss_id) p = '/fml/ag-raetsch/home/cwidmer' (dist_full, id_to_name) = helper.load(p + "/dist") l = [] for i in range(len(dist_full)): l.append([]) for j in range(i + 1, len(dist_full)): l[i].append(dist_full[i, j]) l.reverse() m = orange.SymMatrix(l) root = orange.HierarchicalClustering( m, linkage=orange.HierarchicalClustering.Average) root_node = TreeNode("root") clusters = [root] nodes = [root_node] while len(clusters) > 0: cluster = clusters.pop(0) node = nodes.pop(0) # append nodes if non-empty if cluster.left: clusters.append(cluster.left) name = str(tuple(cluster.left)) if len(tuple(cluster.left)) == 1: name = id_to_name[tuple(cluster.left)[0]] print name # create nodes left_node = TreeNode(name) node.add_child(left_node, 1.0) nodes.append(left_node) # append nodes if non-empty if cluster.right: clusters.append(cluster.right) name = str(tuple(cluster.right)) if len(tuple(cluster.right)) == 1: name = id_to_name[tuple(cluster.right)[0]] print name # create nodes right_node = TreeNode(name) node.add_child(right_node, 1.0) nodes.append(right_node) return root_node
def readMatrix(fn): msg = None matrix = labels = data = None if os.path.splitext(fn)[1] == '.pkl' or os.path.splitext(fn)[1] == '.sym': pkl_file = open(fn, 'rb') matrix = pickle.load(pkl_file) data = None #print self.matrix if hasattr(matrix, 'items'): data = matrix.items pkl_file.close() else: #print fn fle = open(fn) while 1: lne = fle.readline().strip() if lne: break spl = lne.split() try: dim = int(spl[0]) except: msg = "Matrix dimension expected in the first line" raise exceptions.Exception labeled = len(spl) > 1 and spl[1] in ["labelled", "labeled"] matrix = orange.SymMatrix(dim) data = None if labeled: labels = [] else: labels = [""] * dim for li, lne in enumerate(fle): if li > dim: if not li.strip(): continue msg = "File too long" raise exceptions.IndexError spl = lne.split("\t") if labeled: labels.append(spl[0].strip()) spl = spl[1:] if len(spl) > dim: msg = "Line %i too long" % li + 2 raise exceptions.IndexError for lj, s in enumerate(spl): if s: try: matrix[li, lj] = float(s) except: msg = "Invalid number in line %i, column %i" % (li + 2, lj) if msg: raise exceptions.Exception(msg) return matrix, labels, data
def test_indexing(self): m = orange.SymMatrix(5) m[0, 0] = 42 for i in range(5): for j in range(5): self.assertEqual(m[i, j], 0 if i or j else 42) m[3, 2] = 42 for i in range(5): for j in range(5): self.assertEqual( m[i, j], 42 if (i, j) in [(0, 0), (2, 3), (3, 2)] else 0) with self.assertRaises(IndexError): m[-1, 0] with self.assertRaises(IndexError): m[5, 0] with self.assertRaises(IndexError): m[0, -1] with self.assertRaises(IndexError): m[0, 5] with self.assertRaises(IndexError): m[-1, -1] with self.assertRaises(IndexError): self.zerom[0, 0] with self.assertRaises(IndexError): self.zerom[-1, -1]
def cforange_example_distance(input_dict): import orange import random import orngClustering import orngMisc inputdata = input_dict['dataset'] metricsIndex = int(input_dict['distanceMetrics']) metrics = [ ("Euclidean", orange.ExamplesDistanceConstructor_Euclidean), ("Pearson Correlation", orngClustering.ExamplesDistanceConstructor_PearsonR), ("Spearman Rank Correlation", orngClustering.ExamplesDistanceConstructor_SpearmanR), ("Manhattan", orange.ExamplesDistanceConstructor_Manhattan), ("Hamming", orange.ExamplesDistanceConstructor_Hamming), ("Relief", orange.ExamplesDistanceConstructor_Relief), ] normalize = input_dict['normalization'] if normalize=='true': normalize = True else: normalize = False data = inputdata constructor = metrics[metricsIndex][1]() constructor.normalize = normalize dist = constructor(data) matrix = orange.SymMatrix(len(data)) matrix.setattr('items', data) for i in range(len(data)): for j in range(i+1): matrix[i, j] = dist(data[i], data[j]) output_dict = {} output_dict['dm']=matrix return output_dict
def computeMatrix(self): if not self.data: return data = self.data dist = self.metrics[self.Metrics][1](data) self.error(0) try: self.matrix = orange.SymMatrix(len(data)) except orange.KernelException as ex: self.error(0, "Could not create distance matrix! %s" % str(ex)) self.matrix = None self.send("Distance Matrix", None) return self.matrix.setattr('items', data) pb = OWGUI.ProgressBar(self, 100) milestones = orngMisc.progressBarMilestones( len(data) * (len(data) - 1) / 2, 100) count = 0 for i in range(len(data)): for j in range(i + 1): self.matrix[i, j] = dist(data[i], data[j]) if count in milestones: pb.advance() count += 1 pb.finish() self.send("Distance Matrix", self.matrix)
def mutual_information_matrix(data): """return a matrix with mutual information for attribute pairs""" atts = data.domain.attributes mis = orange.SymMatrix(len(atts)) for a in range(len(atts)-1): for b in range(a+1, len(atts)): mis[a,b] = mutual_information(atts[a], atts[b], data) return mis
def calcDistanceMatrix(self): #other distance measures self.distmatrix = orange.SymMatrix(len(self.examples)) for i in range(len(self.examples)): for j in range(i + 1): self.distmatrix[i, j] = self.distance(self.examples[i], self.examples[j]) self.distmatrixC = 1
def test_knn(self): m = orange.SymMatrix(5, 42) m[2, 3] = 5 m[2, 0] = 6 m[2, 1] = 7 self.assertEqual(m.getKNN(2, 3), [3, 0, 1]) self.assertRaises(IndexError, self.zerom.getKNN, 0, 3)
def test_normalizations(self): m = orange.SymMatrix(self.m) m.normalize(m.Normalization.Bounds) for a1, a2 in zip(m.flat(), [x / 14 for x in range(15)]): self.assertAlmostEqual(a1, a2, 4) m = orange.SymMatrix(self.m) m.normalize(m.Normalization.Sigmoid) for a1, a2 in zip(m.flat(), [1 / (1 + exp(-x)) for x in range(15)]): self.assertAlmostEqual(a1, a2, 4) m = orange.SymMatrix(5, 42) m.normalize(m.Normalization.Bounds) self.assertEqual(m.flat(), [0] * 15) self.zerom.normalize(m.Normalization.Bounds) self.zerom.normalize(m.Normalization.Sigmoid)
def distanceMatrix(data): dist = orange.ExamplesDistanceConstructor_Euclidean(data) matrix = orange.SymMatrix(len(data)) matrix.setattr('items', data) for i in range(len(data)): for j in range(i + 1): matrix[i, j] = dist(data[i], data[j]) return matrix
def test(): app = QApplication(sys.argv) w = OWHierarchicalClustering() w.show() data = orange.ExampleTable("../../doc/datasets/iris.tab") id = orange.newmetaid() data.domain.addmeta(id, orange.FloatVariable("a")) data.addMetaAttribute(id) matrix = orange.SymMatrix(len(data)) dist = orange.ExamplesDistanceConstructor_Euclidean(data) matrix = orange.SymMatrix(len(data)) matrix.setattr('items', data) for i in range(len(data)): for j in range(i + 1): matrix[i, j] = dist(data[i], data[j]) w.set_matrix(matrix) app.exec_()
def test_construction(self): m = orange.SymMatrix(5) self.assertEqual(m.dim, 5) self.assertEqual(m[0, 0], 0) self.assertEqual(m[4, 4], 0) self.assertEqual(m[0, 4], 0) self.assertEqual(m[4, 0], 0) m = orange.SymMatrix(5, 42) self.assertEqual(m.dim, 5) self.assertEqual(m[0, 0], 42) self.assertEqual(m[4, 4], 42) self.assertEqual(m[0, 4], 42) self.assertEqual(m[4, 0], 42) self.assertEqual(self.zerom.dim, 0) self.assertRaises(ValueError, orange.SymMatrix, -1)
def construct_distance_matrix(data): ''' Constructs a distance matrix using Euclidean distance ''' euclidean = orange.ExamplesDistanceConstructor_Euclidean(data) distance = orange.SymMatrix(len(data)) for i in range(len(data)): for j in range(i + 1): distance[i, j] = euclidean(data[i], data[j]) return distance
def hierarchicalClustering_attributes(data, distance=None, linkage=orange.HierarchicalClustering.Average, order=False, progressCallback=None): """Return hierarhical clustering of attributes in the data set.""" matrix = orange.SymMatrix(len(data.domain.attributes)) for a1 in range(len(data.domain.attributes)): for a2 in range(a1): matrix[a1, a2] = orange.PearsonCorrelation(a1, a2, data, 0).p root = orange.HierarchicalClustering(matrix, linkage=linkage, progressCallback=progressCallback) if order: orderLeaves(root, matrix, progressCallback=progressCallback) return root
def computeMatrix(self): if not self.data: return data = self.data dist = self.metrics[self.Metrics][1](data) self.matrix = orange.SymMatrix(len(data)) self.matrix.setattr('items', data) for i in range(len(data)): for j in range(i + 1): self.matrix[i, j] = dist(data[i], data[j]) self.send("Distance Matrix", self.matrix)
def test_iris(self): data = orange.ExampleTable("iris") dss = orange.ExamplesDistanceConstructor_Euclidean(data) t = orange.HierarchicalClustering.Linkage for linkage in [t.Single, t.Average, t.Complete, t.Ward]: dist = orange.SymMatrix(len(data)) for i, e in enumerate(data): for j in range(i): dist[i, j] = dss(e, data[j]) root = orange.HierarchicalClustering(dist, linkage=linkage) self.assertEqual(len(root), len(data)) self.rectestlen(root) root.mapping.objects = data self.assertEqual(root[0], data[0])
def cforange_attribute_distance(input_dict): import orange import orngInteract inputdata = input_dict['dataset'] discretizedData = None classInteractions = int(input_dict['classInteractions']) atts = inputdata.domain.attributes if len(atts) < 2: return None matrix = orange.SymMatrix(len(atts)) matrix.setattr('items', atts) if classInteractions < 3: if inputdata.domain.hasContinuousAttributes(): if discretizedData is None: try: discretizedData = orange.Preprocessor_discretize( inputdata, method=orange.EquiNDiscretization(numberOfIntervals=4)) except orange.KernelException, ex: return None data = discretizedData else: data = inputdata # This is ugly (no shit) if not data.domain.classVar: if classInteractions == 0: classedDomain = orange.Domain( data.domain.attributes, orange.EnumVariable("foo", values=["0", "1"])) data = orange.ExampleTable(classedDomain, data) else: return None im = orngInteract.InteractionMatrix(data, dependencies_too=1) off = 1 if classInteractions == 0: diss, labels = im.exportChi2Matrix() off = 0 elif classInteractions == 1: (diss, labels) = im.depExportDissimilarityMatrix( jaccard=1) # 2-interactions else: (diss, labels) = im.exportDissimilarityMatrix( jaccard=1) # 3-interactions for i in range(len(atts) - off): for j in range(i + 1): matrix[i + off, j] = diss[i][j]
def computeMatrix(self): if not self.data: return data = self.data constructor = self.metrics[self.Metrics][1]() constructor.normalize = self.Normalize dist = constructor(data) self.error(0) try: self.matrix = orange.SymMatrix(len(data)) except orange.KernelException, ex: self.error(0, "Could not create distance matrix! %s" % str(ex)) self.matrix = None self.send("Distances", None) return
def hierarchicalClustering(data, distanceConstructor=orange.ExamplesDistanceConstructor_Euclidean, linkage=orange.HierarchicalClustering.Average, order=False, progressCallback=None): """Return a hierarhical clustering of the data set.""" distance = distanceConstructor(data) matrix = orange.SymMatrix(len(data)) for i in range(len(data)): for j in range(i+1): matrix[i, j] = distance(data[i], data[j]) root = orange.HierarchicalClustering(matrix, linkage=linkage, progressCallback=(lambda value, obj=None: progressCallback(value*100.0/(2 if order else 1))) if progressCallback else None) if order: orderLeaves(root, matrix, progressCallback=(lambda value: progressCallback(50.0 + value/2)) if progressCallback else None) return root
def test_transformations(self): m = orange.SymMatrix(self.m) m.negate() self.assertEqual(m.flat(), [-x for x in range(15)]) m = orange.SymMatrix(self.m) m.invert(orange.SymMatrix.Transformation.Negate) self.assertEqual(m.flat(), [-x for x in range(15)]) m = orange.SymMatrix(self.m) m[0, 0] = 1 m.invert() for a1, a2 in zip(m.flat(), [1] + [1 / x for x in range(1, 15)]): self.assertAlmostEqual(a1, a2) m = orange.SymMatrix(self.m) m[0, 0] = 1 m.invert(m.Transformation.Invert) for a1, a2 in zip(m.flat(), [1] + [1 / x for x in range(1, 15)]): self.assertAlmostEqual(a1, a2) m = orange.SymMatrix(self.m) m.subtractFromOne() self.assertEqual(m.flat(), [1 - x for x in range(15)]) m = orange.SymMatrix(self.m) m.invert(m.Transformation.SubtractFromOne) self.assertEqual(m.flat(), [1 - x for x in range(15)]) m = orange.SymMatrix(self.m) m.subtractFromMax() self.assertEqual(m.flat(), [14 - x for x in range(15)]) m = orange.SymMatrix(self.m) m.invert(m.Transformation.SubtractFromMax) self.assertEqual(m.flat(), [14 - x for x in range(15)]) self.zerom.invert() self.zerom.negate() self.zerom.subtractFromOne() self.zerom.subtractFromMax()
def computeMatrix(self): if not self.data: self.send("Distance Matrix", None) return ## if self.Metrics == 0: # bug in orange, correct (remove normalize) once it is fixed ## dist = self.metrics[self.Metrics][1](self.data[0], normalize=0) ## else: ## dist = self.metrics[self.Metrics][1](self.data[0]) matrix = orange.SymMatrix(len(self.data)) matrix.setattr('items', self.data) self.progressBarInit() pbStep = 100. / (len(self.data)**2 / 2. - len(self.data) / 2.) for i in range(len(self.data) - 1): for j in range(i + 1, len(self.data)): ## matrix[i, j] = self.computeDistance(self.data[i], self.data[j], dist) matrix[i, j] = self.metrics[self.Metrics][1]( MA.ravel(self.data[i].toNumpyMA("a")[0]), MA.ravel(self.data[j].toNumpyMA("a")[0])) self.progressBarAdvance(pbStep) self.progressBarFinished() self.send("Distance Matrix", matrix)
def filter(self): if self.subsetAttr > 0: col = str(self.subsetAttrCombo.currentText()) filter = [str(x[col]) for x in self.subset] filter = set(filter) nodes = [ x for x in range(len(self.matrix.items)) if str(self.matrix.items[x][col]) in filter ] nNodes = len(nodes) matrix = orange.SymMatrix(nNodes) for i in range(nNodes): for j in range(i): matrix[i, j] = self.matrix[nodes[i], nodes[j]] matrix.items = self.matrix.items.getitems(nodes) self.send("Distance Matrix", matrix)
def repTime(msg): #print "%s: %s" % (time.asctime(), msg) pass def callback(f, o): print int(round(100 * f)), repTime("Loading data") data = orange.ExampleTable("iris") repTime("Computing distances") matrix = orange.SymMatrix(len(data)) matrix.setattr("objects", data) distance = orange.ExamplesDistanceConstructor_Euclidean(data) for i1, ex1 in enumerate(data): for i2 in range(i1 + 1, len(data)): matrix[i1, i2] = distance(ex1, data[i2]) repTime("Hierarchical clustering (single linkage)") clustering = orange.HierarchicalClustering() clustering.linkage = clustering.Average clustering.overwriteMatrix = 1 root = clustering(matrix) repTime("Done.")
def readMatrix(fn, progress=None): msg = None matrix = labels = data = None if type(fn) != file and (os.path.splitext(fn)[1] == '.pkl' or os.path.splitext(fn)[1] == '.sym'): pkl_file = open(fn, 'rb') matrix = pickle.load(pkl_file) data = None if hasattr(matrix, 'items'): items = matrix.items if isinstance(items, orange.ExampleTable): data = items elif isinstance(items, list) or hasattr(item, "__iter__"): labels = items pkl_file.close() elif type(fn) != file and os.path.splitext(fn)[1] == '.npy': import numpy nmatrix = numpy.load(fn) matrix = orange.SymMatrix(len(nmatrix)) milestones = orngMisc.progressBarMilestones(matrix.dim, 100) for i in range(len(nmatrix)): for j in range(i+1): matrix[j,i] = nmatrix[i,j] if progress and i in milestones: progress.advance() #labels = [""] * len(nmatrix) else: if type(fn) == file: fle = fn else: fle = open(fn) while 1: lne = fle.readline().strip() if lne: break spl = lne.split() try: dim = int(spl[0]) except IndexError: raise ValueError("Matrix dimension expected in the first line.") #print dim labeled = len(spl) > 1 and spl[1] in ["labelled", "labeled"] matrix = orange.SymMatrix(dim) data = None milestones = orngMisc.progressBarMilestones(dim, 100) if labeled: labels = [] else: labels = [""] * dim for li, lne in enumerate(fle): if li > dim: if not li.strip(): continue raise ValueError("File to long") spl = lne.split("\t") if labeled: labels.append(spl[0].strip()) spl = spl[1:] if len(spl) > dim: raise ValueError("Line %i too long" % li+2) for lj, s in enumerate(spl): if s: try: matrix[li, lj] = float(s) except ValueError: raise ValueError("Invalid number in line %i, column %i" % (li+2, lj)) if li in milestones: if progress: progress.advance() if progress: progress.finish() return matrix, labels, data
wut = aspace.weighted_u.tensor vecs = (wut[i, :] for i in xrange(n)) normalized_vecs = [vec.hat() for vec in vecs] def acos(x): if x > 1: return _acos(1) if x < -1: return _acos(-1) return _acos(x) concept_labels = aspace.weighted_u.label_list(0) print 'dist' distance = orange.SymMatrix(n) for i in range(n): for j in range(i + 1): distance[i, j] = acos(normalized_vecs[i] * normalized_vecs[j]) print 'setup' mds = orngMDS.MDS(distance) print 'run' mds.run(100) def get_mds_matrix(): array = np.array(mds.points) matrix = LabeledView(DenseTensor(array), [aspace.u.label_list(0), None]) return matrix
def computeMatrix(self): self.error() if self.data: atts = self.data.domain.attributes matrix = orange.SymMatrix(len(atts)) matrix.setattr('items', atts) if self.classInteractions < 3: if self.data.domain.hasContinuousAttributes(): if self.discretizedData is None: self.discretizedData = orange.Preprocessor_discretize( self.data, method=orange.EquiNDiscretization( numberOfIntervals=4)) data = self.discretizedData else: data = self.data # This is ugly, but: Aleks' code which computes Chi2 requires the class attribute because it prepares # some common stuff for all measures. If we want to use his code, we need the class variable, so we # prepare a fake one if not data.domain.classVar: if self.classInteractions == 0: classedDomain = orange.Domain( data.domain.attributes, orange.EnumVariable("foo", values=["0", "1"])) data = orange.ExampleTable(classedDomain, data) else: self.error( "The selected distance measure requires a data set with a class attribute" ) return None im = orngInteract.InteractionMatrix(data, dependencies_too=1) off = 1 if self.classInteractions == 0: diss, labels = im.exportChi2Matrix() off = 0 elif self.classInteractions == 1: (diss, labels) = im.depExportDissimilarityMatrix( jaccard=1) # 2-interactions else: (diss, labels) = im.exportDissimilarityMatrix( jaccard=1) # 3-interactions for i in range(len(atts) - off): for j in range(i + 1): matrix[i + off, j] = diss[i][j] else: if self.classInteractions == 3: for a1 in range(len(atts)): for a2 in range(a1): matrix[a1, a2] = orange.PearsonCorrelation( a1, a2, self.data, 0).p else: import numpy, statc m = self.data.toNumpyMA("A")[0] averages = numpy.ma.average(m, axis=0) filleds = [ list(numpy.ma.filled(m[:, i], averages[i])) for i in range(len(atts)) ] for a1, f1 in enumerate(filleds): for a2 in range(a1): matrix[a1, a2] = statc.spearmanr(f1, filleds[a2])[1] return matrix else: return None
import orange m = orange.SymMatrix(4) for i in range(4): for j in range(i + 1): m[i, j] = (i + 1) * (j + 1) print m print m.matrixType = m.Upper print m print m.matrixType = m.UpperFilled print m print m.matrixType = m.Lower for row in m[:3]: print row
def __init__(self, distances=None, dim=2, **kwargs): self.mds = orangemds.MDS(distances, dim, **kwargs) self.originalDistances = orange.SymMatrix([m for m in self.distances])
def showWidget(self): self.information() if self.ow is not None: self.ow.topWidgetPart.hide() self.ow.setLayout(self.layout()) elif self.layout() is not None: sip.delete(self.layout()) self.ow = None if self.data is None: self.information("No learning data given.") return if self.model is None: return if "model" not in self.model.domain: return if "label" in self.model.domain: attr = self.model["label"].value.split(', ') modelType = self.model["model"].value.upper() projWidget = None if modelType == "SCATTERPLOT" or modelType == "SCATTTERPLOT": projWidget = self.setWidget(OWScatterPlot.OWScatterPlot) if modelType == "RADVIZ": projWidget = self.setWidget(OWRadviz.OWRadviz) if modelType == "POLYVIZ": projWidget = self.setWidget(OWPolyviz.OWPolyviz) if projWidget is not None: self.ow.setData(self.data) self.ow.setShownAttributes(attr) self.ow.handleNewSignals() ################################ ### add new model types here ### ################################ if modelType == "SPCA" or modelType == "LINPROJ": self.setWidget(OWLinProj.OWLinProj) self.ow.setData(self.data) self.ow.setShownAttributes(attr) self.ow.handleNewSignals() xAnchors, yAnchors = self.model["anchors"].value self.ow.updateGraph(None, setAnchors=1, XAnchors=xAnchors, YAnchors=yAnchors) if modelType == "TREE": self.setWidget(OWClassificationTreeGraph.OWClassificationTreeGraph) classifier = self.model["classifier"].value self.ow.ctree(classifier) if modelType == "BAYES": self.setWidget(OWNomogram.OWNomogram) classifier = self.model["classifier"].value self.ow.classifier(classifier) if modelType == "KNN": exclude = [att for att in self.data.domain if att.name not in attr + [self.data.domain.classVar.name]] data2 = orange.Preprocessor_ignore(self.data, attributes = exclude) dist = orange.ExamplesDistanceConstructor_Euclidean(data2) smx = orange.SymMatrix(len(data2)) smx.setattr('items', data2) pb = OWGUI.ProgressBar(self, 100) milestones = orngMisc.progressBarMilestones(len(data2)*(len(data2)-1)/2, 100) count = 0 for i in range(len(data2)): for j in range(i+1): smx[i, j] = dist(data2[i], data2[j]) if count in milestones: pb.advance() count += 1 pb.finish() self.setWidget(OWMDS.OWMDS) self.ow.cmatrix(smx) if self.ow is not None: self.ow.send = self.send if self.layout() is not None: sip.delete(self.layout()) self.setLayout(self.ow.layout()) self.ow.topWidgetPart.show() self.update()