コード例 #1
0
class TestOAOSVM(TestCase):
    training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/satimage/sat-train-s.csv'
    training_set = Dataset.load(training_file)
    training_classes = Dataset.split(training_set)
    class_cnt = len(training_classes.keys())
    gamma = 0.1
    svm = OAOSVM(gamma=gamma)

    def test_train(self):
        self.svm.train(self.training_classes)

    def test_predict(self):
        errors = 0
        total = 0
        for class_name, class_samples in self.training_classes.items():
            for sample in class_samples:
                total += 1
                if self.svm.predict(sample) != class_name:
                    # wrong prediction
                    errors += 1
        # just to see the idea
        print('errors:', errors, ' total:', total)
        assert errors == 0

    def test_cross_validate(self):
        # 10 folds validation
        res = self.svm.cross_validate(10, self.training_classes)
        # this just to get the idea
        assert res == 0
コード例 #2
0
class TestGroup(TestCase):
    training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/satimage/sat-train-s.csv'
    training_set = Dataset.load(training_file)
    training_classes = Dataset.split(training_set)
    class_cnt = len(training_classes.keys())

    def test___init__(self):
        pass
コード例 #3
0
class TestSimMultiSVM(TestCase):
    training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/satimage/sat-train-s.csv'
    training_set = Dataset.load(training_file)
    training_classes = Dataset.split(training_set)
    class_cnt = len(training_classes.keys())
    gamma = 0.1
    svm = SimMultiSVM(gamma=gamma)

    def test__find_separability(self):
        # svm = SimBinarySVM(Kernel)
        (self.svm.separability, self.svm.label_to_int,
         self.svm.int_to_label) = self.svm._find_separability(
             self.training_classes)
        # print('similarity', similarity)
        assert self.svm.separability.size == self.class_cnt * self.class_cnt
        assert self.svm.separability[0].size == self.class_cnt

        # print('labelToINt:', labelToInt)
        assert len(self.svm.label_to_int.keys()) == 6

        # print('int_to_label', int_to_label)
        for idx, val in enumerate(self.svm.int_to_label):
            assert self.svm.label_to_int[val] == idx

    @pytest.mark.run(after='test__find_similarity')
    def test_Train(self):
        self.svm.train(self.training_classes)

        def runner(current):
            if current.children == None:
                return

            assert len(current.svms) == len(current.children)
            for child in current.children:
                runner(child)

        runner(self.svm.tree.root)

    @pytest.mark.run(after='test_train')
    def test_predict(self):
        errors = 0
        total = 0
        for class_name, class_samples in self.training_classes.items():
            for sample in class_samples:
                total += 1
                if self.svm.predict(sample) != class_name:
                    # wrong prediction
                    errors += 1
        # just to see the idea
        print('errors:', errors, ' total:', total)
        assert errors == 0

    @pytest.mark.run(after='test_predict')
    def test_cross_validate(self):
        # 10 folds validation
        res = self.svm.cross_validate(10, self.training_classes)
        # this just to get the idea
        assert res == 0
コード例 #4
0
class TestDataset(TestCase):
    file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/iris.csv'
    dataset = Dataset.load(file)
    splitted = Dataset.split(dataset)

    def test_Load(self):
        assert len(self.dataset.features[0]) == 4

    def test_Split(self):
        assert len(self.splitted.keys()) == 3

        sum_splitted = 0
        for name, members in self.splitted.items():
            sum_splitted += len(members)
            for each in members:
                assert len(each) == 4
        assert sum_splitted == len(self.dataset.features)
コード例 #5
0
num_workers = multiprocessing.cpu_count()
print('workers: ', num_workers)

training_files = [
    ('satimage', 'satimage/sat-train-s.csv', 'satimage/sat-test.csv'),
]

for training in training_files:
    project_name = training[0]
    print('working on project: ', project_name)

    # load dataset
    training_file = training[1]
    training_set = Dataset.load(training_file)
    training_classes = Dataset.split(training_set)

    testing_file = training[2]
    testing_set = Dataset.load(testing_file)
    testing_classes = Dataset.split(testing_set)

    best = {}

    for each in (
            ('OAO', OAOSVM),
            ('SimBinarySVM', SimBinarySVM),
            ('SimMultiSVM', SimMultiSVM),
    ):

        svm_type = each[0]
        SVM = each[1]
コード例 #6
0
class TestSimBinarySVM(TestCase):
    training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/satimage/sat-train-s.csv'
    training_set = Dataset.load(training_file)
    training_classes = Dataset.split(training_set)
    class_cnt = len(training_classes.keys())
    gamma = 1e-6
    C = 0.01
    svm = SimBinarySVM(gamma=gamma, C=C)

    # def test_MakeRBFKernel(self):
    #     self.fail()

    def test_find_separability(self):
        # svm = SimBinarySVM(Kernel)
        (self.svm.separability, self.svm.label_to_int,
         self.svm.int_to_label) = self.svm._find_separability(
             self.training_classes)
        # print('similarity', similarity)
        assert self.svm.separability.size == self.class_cnt * self.class_cnt
        assert self.svm.separability[0].size == self.class_cnt

        # print('labelToINt:', labelToInt)
        assert len(self.svm.label_to_int.keys()) == 6

        # print('intToLabel', intToLabel)
        for idx, val in enumerate(self.svm.int_to_label):
            assert self.svm.label_to_int[val] == idx

    @pytest.mark.run(after='test_find_separability')
    def test_construct_mst_graph(self):
        (self.svm.mst_graph,
         self.svm.mst_list) = self.svm._construct_mst_graph(
             self.training_classes, self.svm.separability)
        assert len(self.svm.mst_list) == self.class_cnt - 1
        assert len(self.svm.mst_graph.connected_with(0)) == self.class_cnt

        cnt = 0
        for i, row in enumerate(self.svm.mst_graph.connection):
            for j, dist in enumerate(row):
                if dist != float('inf'):
                    cnt += 1

        # the graph bidirectional
        assert cnt == (self.class_cnt - 1) * 2

    @pytest.mark.run(after='test_construct_mst_graph')
    def test_construct_tree(self):
        self.svm.tree = self.svm._construct_tree(self.svm.mst_graph,
                                                 self.svm.mst_list)

        def runner(current):
            if current.left is None and current.right is None:
                return

            assert len(
                current.val) == len(current.left.val) + len(current.right.val)

            assert set(current.val) == set(current.left.val +
                                           current.right.val)

            runner(current.left)
            runner(current.right)

        runner(self.svm.tree.root)

    @pytest.mark.run(after='test_construct_tree')
    def test_train(self):
        self.svm.train(self.training_classes)

        def runner(current):
            if current.left is None and current.right is None:
                return

            assert current.svm
            runner(current.left)
            runner(current.right)

        runner(self.svm.tree.root)

    @pytest.mark.run(after='test_train')
    def test_predict(self):
        errors = 0
        total = 0
        for class_name, class_samples in self.training_classes.items():
            for sample in class_samples:
                total += 1
                if self.svm.predict(sample) != class_name:
                    # wrong prediction
                    errors += 1
        # just to see the idea
        print('errors:', errors, ' total:', total)
        assert errors == 0

    @pytest.mark.run(after='test_predict')
    def test_cross_validate(self):
        # 10 folds validation
        res = self.svm.cross_validate(10, self.training_classes)
        # this just to get the idea
        assert res == 0

    def test_make_gram_matrix(self):
        gamma = 0.1
        vectors = []
        training_classes_with_idx = {}
        idx = 0
        for name, points in self.training_classes.items():
            this_class = training_classes_with_idx[name] = []
            for point in points:
                # give it an index
                vector = point.tolist()
                vector_with_idx = [idx] + vector
                idx += 1
                vectors.append(vector)
                this_class.append(vector_with_idx)
            training_classes_with_idx[name] = numpy.array(this_class)

        vectors = numpy.array(vectors)
        kernel = self.svm.make_gram_matrix(vectors, gamma)

        def original_kernel(a, b):
            import numpy
            return numpy.exp(-gamma * numpy.linalg.norm(a - b)**2)

        for class_name, samples in training_classes_with_idx.items():
            a = samples
            b = a[:].tolist()
            random.shuffle(b)
            b = numpy.array(b)

            for i in range(a.shape[0]):
                assert abs(
                    kernel(a[i], b[i]) -
                    original_kernel(a[i][1:], b[i][1:])) < 1e-5
コード例 #7
0
class TestSimBinarySVMORI(TestCase):
    training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/satimage/sat-train-s.csv'
    training_set = Dataset.load(training_file)
    training_classes = Dataset.split(training_set)
    class_cnt = len(training_classes.keys())
    gamma = 1e-6
    C = 0.01
    svm = SimBinarySVMORI(gamma=gamma, C=C)

    def test_create_mapping(self):
        self.label_to_int, self.int_to_label = self.svm._create_mapping(
            self.training_classes)

    @pytest.mark.run(after='test_create_mapping')
    def test_create_tree(self):
        self.label_to_int, self.int_to_label = self.svm._create_mapping(
            self.training_classes)
        self.group_mgr = self.svm._create_tree(self.training_classes,
                                               self.label_to_int)

        def runner(current):
            if current.children == None:
                return

            child_universe = []
            for child in current.children:
                child_universe += list(child.universe.keys())
            assert set(current.universe.keys()) == set(child_universe)

            for child in current.children:
                runner(child)

        runner(next(iter(self.group_mgr.groups.values())))

    @pytest.mark.run(after='test_construct_tree')
    def test_train(self):
        group_mgr = self.svm.train(self.training_classes)

        def runner(current):
            if current.children == None:
                return

            assert current.svm
            for child in current.children:
                runner(child)

        runner(next(iter(group_mgr.groups.values())))

    @pytest.mark.run(after='test_train')
    def test_predict(self):
        group_mgr = self.svm.train(self.training_classes)
        errors = 0
        total = 0
        for class_name, class_samples in self.training_classes.items():
            for sample in class_samples:
                total += 1
                if self.svm.predict(sample) != class_name:
                    # wrong prediction
                    errors += 1
        # just to see the idea
        print('errors:', errors, ' total:', total)
        assert errors == 0

    @pytest.mark.run(after='test_predict')
    def test_cross_validate(self):
        group_mgr = self.svm.train(self.training_classes)
        # 10 folds validation
        res = self.svm.cross_validate(10, self.training_classes)
        # this just to get the idea
        assert res == 0

    def test_make_gram_matrix(self):
        gamma = 0.1
        vectors = []
        training_classes_with_idx = {}
        idx = 0
        for name, points in self.training_classes.items():
            this_class = training_classes_with_idx[name] = []
            for point in points:
                # give it an index
                vector = point.tolist()
                vector_with_idx = [idx] + vector
                idx += 1
                vectors.append(vector)
                this_class.append(vector_with_idx)
            training_classes_with_idx[name] = numpy.array(this_class)

        vectors = numpy.array(vectors)
        kernel = self.svm.make_gram_matrix(vectors, gamma)

        def original_kernel(a, b):
            import numpy

            return numpy.exp(-gamma * numpy.linalg.norm(a - b)**2)

        for class_name, samples in training_classes_with_idx.items():
            a = samples
            b = a[:].tolist()
            random.shuffle(b)
            b = numpy.array(b)

            for i in range(a.shape[0]):
                assert abs(
                    kernel(a[i], b[i]) -
                    original_kernel(a[i][1:], b[i][1:])) < 1e-5