Example #1
0
class Node(object):
    """An object holding the information about the node in the map.

    .. attribute:: pos

        Node position.

    .. attribute:: reference_instance

        Reference data instance (a prototype).
        
    .. attribute:: instances
    
        Data set with training instances that were mapped to the node.
         
    """
    def __init__(self, pos, map=None, vector=None):
        self.pos = pos
        self.map = map
        self.vector = vector
        self.reference_instance = None
        self.instances = None

    referenceExample = deprecated_attribute("referenceExample",
                                            "reference_instance")
    examples = deprecated_attribute("examples", "instances")
Example #2
0
class Map(object):
    """Self organizing map (the structure). Includes methods for
    data initialization.
    
    .. attribute:: map_shape
    
        A two element tuple containing the map width and height.
         
    .. attribute:: topology
    
        Topology of the map (``HexagonalTopology`` or 
        ``RectangularTopology``)
        
    .. attribute:: map

        Self orginzing map. A list of lists of :obj:`Node`.
        
    """

    HexagonalTopology = HexagonalTopology
    RectangularTopology = RectangularTopology
    InitializeLinear = InitializeLinear
    InitializeRandom = InitializeRandom
    NeighbourhoodGaussian = NeighbourhoodGaussian
    NeighbourhoodBubble = NeighbourhoodBubble
    NeighbourhoodEpanechicov = NeighbourhoodEpanechicov

    def __init__(self, map_shape=(20, 40), topology=HexagonalTopology):
        self.map_shape = map_shape
        self.topology = topology
        self.map = [[Node((i, j), self) for j in range(map_shape[1])]
                    for i in range(map_shape[0])]

    def __getitem__(self, pos):
        """ Return the node at position x, y.
        """
        x, y = pos
        return self.map[x][y]

    def __iter__(self):
        """ Iterate over all nodes in the map.
        """
        for row in self.map:
            for node in row:
                yield node

    def vectors(self):
        """Return all vectors of the map as rows in an numpy.array.
        """
        return numpy.array([node.vector for node in self])

    def unit_distances(self):
        """Return a NxN numpy.array of internode distances (based on
        node position in the map, not vector space) where N is the 
        number of nodes.
        
        """
        nodes = list(self)
        dist = numpy.zeros((len(nodes), len(nodes)))

        coords = self.unit_coords()
        for i in range(len(nodes)):
            for j in range(len(nodes)):
                dist[i, j] = numpy.sqrt(
                    numpy.dot(coords[i] - coords[j], coords[i] - coords[j]))
        return numpy.array(dist)

    def unit_coords(self):
        """ Return the unit coordinates of all nodes in the map 
        as an numpy.array.
        
        """
        nodes = list(self)
        coords = numpy.zeros((len(nodes), len(self.map_shape)))

        k = [self.map_shape[1], 1]
        inds = numpy.arange(len(nodes))
        for i in range(0, len(self.map_shape)):
            coords[:, i] = numpy.transpose(numpy.floor(inds / k[i]))
            inds = numpy.mod(inds, k[i])

        ## in hexagonal topology we move every odd map row by 0.5 (only the second coordinate)
        ## and multiply all the first coordinates by sqrt(0.75) to assure that
        ## distances between neighbours are of unit size
        if self.topology == Map.HexagonalTopology:
            ind = numpy.nonzero(numpy.mod(coords[:, 0], 2))
            coords[ind, 1] = coords[ind, 1] + 0.5
            coords[:, 0] = coords[:, 0] * numpy.sqrt(0.75)
        return coords

    def initialize_map_random(self, data=None, dimension=5):
        """Initialize the map nodes vectors randomly, by supplying
        either training data or dimension of the data.
        
        """
        if data is not None:
            min, max = ma.min(data, 0), ma.max(data, 0)
            dimension = data.shape[1]
        else:
            min, max = numpy.zeros(dimension), numpy.ones(dimension)
        for node in self:
            #            node.vector = min + numpy.random.rand(dimension) * (max - min)
            node.vector = min + random.randint(0, dimension) * (max - min)

    def initialize_map_linear(self, data, map_shape=(10, 20)):
        """ Initialize the map node vectors linearly over the subspace
        of the two most significant eigenvectors.
        
        """
        data = data.copy()  #ma.array(data)
        dim = data.shape[1]
        mdim = len(map_shape)
        munits = len(list(self))
        me = ma.mean(data, 0)
        A = numpy.zeros((dim, dim))

        for i in range(dim):
            data[:, i] = data[:, i] - me[i]

        for i in range(dim):
            for j in range(dim):
                c = data[:, i] * data[:, j]
                A[i, j] = ma.sum(c) / len(c)
                A[j, i] = A[i, j]

        eigval, eigvec = numpy.linalg.eig(A)
        ind = list(reversed(numpy.argsort(eigval)))
        eigval = eigval[ind[:mdim]]
        eigvec = eigvec[:, ind[:mdim]]

        for i in range(mdim):
            eigvec[:, i] = eigvec[:, i] / numpy.sqrt(
                numpy.dot(eigvec[:, i], eigvec[:, i])) * numpy.sqrt(eigval[i])

        unit_coords = self.unit_coords()
        for d in range(mdim):
            max, min = numpy.max(unit_coords[:, d]), numpy.min(unit_coords[:,
                                                                           d])
            if max > min:
                unit_coords[:, d] = (unit_coords[:, d] - min) / (max - min)
            ## in case of one-dimensional SOM
            else:
                unit_coords[:, d] = 0.5

        unit_coords = (unit_coords - 0.5) * 2

        vectors = numpy.array([me for i in range(munits)])
        for i in range(munits):
            for d in range(mdim):
                vectors[i] = vectors[i] + unit_coords[i][d] * numpy.transpose(
                    eigvec[:, d])

        for i, node in enumerate(self):
            node.vector = vectors[i]

    def get_u_matrix(self):
        return getUMat(self)

    getUMat = deprecated_attribute("getUMat", "get_u_matrix")
Example #3
0
class SOMMap(Orange.core.Classifier):
    """Project the data onto the inferred self-organizing map.
    
    :param map: a trained self-organizing map
    :type map: :obj:`SOMMap`
    :param data: the data to be mapped on the map
    :type data: :obj:`Orange.data.Table`
    
    """
    def __init__(self, map=[], data=[]):
        self.map = map
        self.data = data
        for node in map:
            node.reference_instance = Orange.data.Instance(Orange.data.Domain(self.data.domain.attributes, False),
                                                 [(var(value) if var.varType == Orange.feature.Type.Continuous else var(int(value))) \
                                                  for var, value in zip(self.data.domain.attributes, node.vector)])

            node.instances = Orange.data.Table(self.data.domain)

        for inst in self.data:
            node = self.get_best_matching_node(inst)
            node.instances.append(inst)

        if self.data and self.data.domain.class_var:
            for node in self.map:
                node.classifier = Orange.classification.majority.MajorityLearner(
                    node.instances if node.instances else self.data)

            self.class_var = self.data.domain.class_var
        else:
            self.class_var = None

    classVar = deprecated_attribute("classVar", "class_var")
    examples = deprecated_attribute("examples", "data")

    def get_best_matching_node(self, instance):
        """Return the best matching node for a given data instance
        """
        instance, c, w = Orange.data.Table([instance]).toNumpyMA()
        vectors = self.map.vectors()
        Dist = vectors - instance
        bmu = ma.argmin(ma.sum(Dist**2, 1))
        return list(self.map)[bmu]

    getBestMatchingNode = \
        deprecated_attribute("getBestMatchingNode",
                             "get_best_matching_node")

    def __call__(self,
                 instance,
                 what=Orange.classification.Classifier.GetValue):
        """Map `instance` onto the best matching node and predict
        its class using the majority/mean of the training data in
        that node. 
         
        """
        bmu = self.get_best_matching_node(instance)
        return bmu.classifier(instance, what)

    def __getattr__(self, name):
        try:
            return getattr(self.__dict__["map"], name)
        except (KeyError, AttributeError):
            raise AttributeError(name)

    def __iter__(self):
        """ Iterate over all nodes in the map
        """
        return iter(self.map)

    def __getitem__(self, val):
        """ Return the node at position x, y
        """
        return self.map.__getitem__(val)