class Node(object): """An object holding the information about the node in the map. .. attribute:: pos Node position. .. attribute:: reference_instance Reference data instance (a prototype). .. attribute:: instances Data set with training instances that were mapped to the node. """ def __init__(self, pos, map=None, vector=None): self.pos = pos self.map = map self.vector = vector self.reference_instance = None self.instances = None referenceExample = deprecated_attribute("referenceExample", "reference_instance") examples = deprecated_attribute("examples", "instances")
class Map(object): """Self organizing map (the structure). Includes methods for data initialization. .. attribute:: map_shape A two element tuple containing the map width and height. .. attribute:: topology Topology of the map (``HexagonalTopology`` or ``RectangularTopology``) .. attribute:: map Self orginzing map. A list of lists of :obj:`Node`. """ HexagonalTopology = HexagonalTopology RectangularTopology = RectangularTopology InitializeLinear = InitializeLinear InitializeRandom = InitializeRandom NeighbourhoodGaussian = NeighbourhoodGaussian NeighbourhoodBubble = NeighbourhoodBubble NeighbourhoodEpanechicov = NeighbourhoodEpanechicov def __init__(self, map_shape=(20, 40), topology=HexagonalTopology): self.map_shape = map_shape self.topology = topology self.map = [[Node((i, j), self) for j in range(map_shape[1])] for i in range(map_shape[0])] def __getitem__(self, pos): """ Return the node at position x, y. """ x, y = pos return self.map[x][y] def __iter__(self): """ Iterate over all nodes in the map. """ for row in self.map: for node in row: yield node def vectors(self): """Return all vectors of the map as rows in an numpy.array. """ return numpy.array([node.vector for node in self]) def unit_distances(self): """Return a NxN numpy.array of internode distances (based on node position in the map, not vector space) where N is the number of nodes. """ nodes = list(self) dist = numpy.zeros((len(nodes), len(nodes))) coords = self.unit_coords() for i in range(len(nodes)): for j in range(len(nodes)): dist[i, j] = numpy.sqrt( numpy.dot(coords[i] - coords[j], coords[i] - coords[j])) return numpy.array(dist) def unit_coords(self): """ Return the unit coordinates of all nodes in the map as an numpy.array. """ nodes = list(self) coords = numpy.zeros((len(nodes), len(self.map_shape))) k = [self.map_shape[1], 1] inds = numpy.arange(len(nodes)) for i in range(0, len(self.map_shape)): coords[:, i] = numpy.transpose(numpy.floor(inds / k[i])) inds = numpy.mod(inds, k[i]) ## in hexagonal topology we move every odd map row by 0.5 (only the second coordinate) ## and multiply all the first coordinates by sqrt(0.75) to assure that ## distances between neighbours are of unit size if self.topology == Map.HexagonalTopology: ind = numpy.nonzero(numpy.mod(coords[:, 0], 2)) coords[ind, 1] = coords[ind, 1] + 0.5 coords[:, 0] = coords[:, 0] * numpy.sqrt(0.75) return coords def initialize_map_random(self, data=None, dimension=5): """Initialize the map nodes vectors randomly, by supplying either training data or dimension of the data. """ if data is not None: min, max = ma.min(data, 0), ma.max(data, 0) dimension = data.shape[1] else: min, max = numpy.zeros(dimension), numpy.ones(dimension) for node in self: # node.vector = min + numpy.random.rand(dimension) * (max - min) node.vector = min + random.randint(0, dimension) * (max - min) def initialize_map_linear(self, data, map_shape=(10, 20)): """ Initialize the map node vectors linearly over the subspace of the two most significant eigenvectors. """ data = data.copy() #ma.array(data) dim = data.shape[1] mdim = len(map_shape) munits = len(list(self)) me = ma.mean(data, 0) A = numpy.zeros((dim, dim)) for i in range(dim): data[:, i] = data[:, i] - me[i] for i in range(dim): for j in range(dim): c = data[:, i] * data[:, j] A[i, j] = ma.sum(c) / len(c) A[j, i] = A[i, j] eigval, eigvec = numpy.linalg.eig(A) ind = list(reversed(numpy.argsort(eigval))) eigval = eigval[ind[:mdim]] eigvec = eigvec[:, ind[:mdim]] for i in range(mdim): eigvec[:, i] = eigvec[:, i] / numpy.sqrt( numpy.dot(eigvec[:, i], eigvec[:, i])) * numpy.sqrt(eigval[i]) unit_coords = self.unit_coords() for d in range(mdim): max, min = numpy.max(unit_coords[:, d]), numpy.min(unit_coords[:, d]) if max > min: unit_coords[:, d] = (unit_coords[:, d] - min) / (max - min) ## in case of one-dimensional SOM else: unit_coords[:, d] = 0.5 unit_coords = (unit_coords - 0.5) * 2 vectors = numpy.array([me for i in range(munits)]) for i in range(munits): for d in range(mdim): vectors[i] = vectors[i] + unit_coords[i][d] * numpy.transpose( eigvec[:, d]) for i, node in enumerate(self): node.vector = vectors[i] def get_u_matrix(self): return getUMat(self) getUMat = deprecated_attribute("getUMat", "get_u_matrix")
class SOMMap(Orange.core.Classifier): """Project the data onto the inferred self-organizing map. :param map: a trained self-organizing map :type map: :obj:`SOMMap` :param data: the data to be mapped on the map :type data: :obj:`Orange.data.Table` """ def __init__(self, map=[], data=[]): self.map = map self.data = data for node in map: node.reference_instance = Orange.data.Instance(Orange.data.Domain(self.data.domain.attributes, False), [(var(value) if var.varType == Orange.feature.Type.Continuous else var(int(value))) \ for var, value in zip(self.data.domain.attributes, node.vector)]) node.instances = Orange.data.Table(self.data.domain) for inst in self.data: node = self.get_best_matching_node(inst) node.instances.append(inst) if self.data and self.data.domain.class_var: for node in self.map: node.classifier = Orange.classification.majority.MajorityLearner( node.instances if node.instances else self.data) self.class_var = self.data.domain.class_var else: self.class_var = None classVar = deprecated_attribute("classVar", "class_var") examples = deprecated_attribute("examples", "data") def get_best_matching_node(self, instance): """Return the best matching node for a given data instance """ instance, c, w = Orange.data.Table([instance]).toNumpyMA() vectors = self.map.vectors() Dist = vectors - instance bmu = ma.argmin(ma.sum(Dist**2, 1)) return list(self.map)[bmu] getBestMatchingNode = \ deprecated_attribute("getBestMatchingNode", "get_best_matching_node") def __call__(self, instance, what=Orange.classification.Classifier.GetValue): """Map `instance` onto the best matching node and predict its class using the majority/mean of the training data in that node. """ bmu = self.get_best_matching_node(instance) return bmu.classifier(instance, what) def __getattr__(self, name): try: return getattr(self.__dict__["map"], name) except (KeyError, AttributeError): raise AttributeError(name) def __iter__(self): """ Iterate over all nodes in the map """ return iter(self.map) def __getitem__(self, val): """ Return the node at position x, y """ return self.map.__getitem__(val)