Ejemplo n.º 1
0
    def _find_separability(self, training_classes):
        # create a matrix list and give them indexes
        vectors = []
        training_classes_with_idx = {}
        idx = 0
        for name, points in training_classes.items():
            this_class = training_classes_with_idx[name] = []
            for point in points:
                # give it an index
                vector = point.tolist()
                vector_with_idx = [idx] + vector
                idx += 1
                vectors.append(vector)
                this_class.append(vector_with_idx)
            training_classes_with_idx[name] = numpy.array(this_class)

        vectors = numpy.array(vectors)
        kernel = self.make_gram_matrix(vectors, self.gamma)
        self.tools = Tools(kernel)

        # calculate all the sqRadiuses
        if self.verbose:
            start_time = time.process_time()
        sq_radiuses = {}
        for name, points in training_classes_with_idx.items():
            sq_radiuses[name] = self.tools.squared_radius(name, points)
        if self.verbose:
            print('sq_radiuses: %.4f' % (time.process_time() - start_time))

        # separability section
        # use the precalculated squared radiuses from above
        def find_separability(name_a, name_b):
            sq_ra = sq_radiuses[name_a]
            sq_rb = sq_radiuses[name_b]
            sq_dist = self.tools.squared_distance(
                name_a,
                training_classes_with_idx[name_a],
                name_b,
                training_classes_with_idx[name_b],
            )

            return sq_dist / (sq_ra + sq_rb)

        # create mapping function from labels to integers and vice ver
        class_cnt = len(training_classes.keys())
        label_to_int = {}
        int_to_label = [None for i in range(class_cnt)]
        for i, label in enumerate(training_classes.keys()):
            label_to_int[label] = i
            int_to_label[i] = label

        # 2d matrix showing separability of each
        if self.verbose:
            start_time = time.process_time()
        separability = numpy.empty((class_cnt, class_cnt))
        separability.fill(float('inf'))
        for i, a in enumerate(training_classes.keys()):
            int_a = label_to_int[a]
            # should be no separability with itself
            separability[int_a][int_a] = 0
            for b in list(training_classes.keys())[i + 1:]:
                int_b = label_to_int[b]
                separability[int_a][int_b] = separability[int_b][
                    int_a] = find_separability(a, b)
        if self.verbose:
            print('separability: %.4f' % (time.process_time() - start_time))

        return separability, label_to_int, int_to_label
 def __init__(self, kernel):
     self.kernel = kernel
     self.groups = {}
     self.similarity = {}
     self.tools = Tools(kernel)
    def _find_separability(self, training_classes):
        # create a matrix list and give them indexes
        vectors = []
        training_classes_with_idx = {}
        idx = 0
        for name, points in training_classes.items():
            this_class = training_classes_with_idx[name] = []
            for point in points:
                # give it an index
                vector = point.tolist()
                vector_with_idx = [idx] + vector
                idx += 1
                vectors.append(vector)
                this_class.append(vector_with_idx)
            training_classes_with_idx[name] = numpy.array(this_class)

        vectors = numpy.array(vectors)
        kernel = self.make_gram_matrix(vectors, self.gamma)
        self.tools = Tools(kernel)

        # find radius of each class
        if self.verbose:
            start_time = time.process_time()
        sq_radiuses = {}
        for name, points in training_classes_with_idx.items():
            sq_radiuses[name] = self.tools.squared_radius(name, points)
        if self.verbose:
            print('train: %.4f' % (time.process_time() - start_time))

        def find_separability(a, b):
            sq_ra = sq_radiuses[a]
            sq_rb = sq_radiuses[b]
            sq_dist = self.tools.squared_distance(
                a,
                training_classes_with_idx[a],
                b,
                training_classes_with_idx[b],
            )
            return sq_dist / (sq_ra + sq_rb)

        # relabelling
        class_cnt = len(training_classes.keys())
        label_to_int = {}
        int_to_label = [None for i in range(class_cnt)]
        for i, label in enumerate(training_classes.keys()):
            label_to_int[label] = i
            int_to_label[i] = label

        # find separability of each pair
        # default value is very high separability
        if self.verbose:
            start_time = time.process_time()
        separability = numpy.empty((class_cnt, class_cnt))
        separability.fill(float('inf'))
        for i, a in enumerate(training_classes.keys()):
            int_a = label_to_int[a]
            # should be no separability with itself
            separability[int_a][int_a] = 0
            for b in list(training_classes.keys())[i + 1:]:
                int_b = label_to_int[b]
                separability[int_a][int_b] = separability[int_b][int_a] = find_separability(a, b)
        if self.verbose:
            print('train: %.4f' % (time.process_time() - start_time))

        return separability, label_to_int, int_to_label