class TestOAOSVM(TestCase): training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/satimage/sat-train-s.csv' training_set = Dataset.load(training_file) training_classes = Dataset.split(training_set) class_cnt = len(training_classes.keys()) gamma = 0.1 svm = OAOSVM(gamma=gamma) def test_train(self): self.svm.train(self.training_classes) def test_predict(self): errors = 0 total = 0 for class_name, class_samples in self.training_classes.items(): for sample in class_samples: total += 1 if self.svm.predict(sample) != class_name: # wrong prediction errors += 1 # just to see the idea print('errors:', errors, ' total:', total) assert errors == 0 def test_cross_validate(self): # 10 folds validation res = self.svm.cross_validate(10, self.training_classes) # this just to get the idea assert res == 0
class TestGroup(TestCase): training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/satimage/sat-train-s.csv' training_set = Dataset.load(training_file) training_classes = Dataset.split(training_set) class_cnt = len(training_classes.keys()) def test___init__(self): pass
class TestSimMultiSVM(TestCase): training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/satimage/sat-train-s.csv' training_set = Dataset.load(training_file) training_classes = Dataset.split(training_set) class_cnt = len(training_classes.keys()) gamma = 0.1 svm = SimMultiSVM(gamma=gamma) def test__find_separability(self): # svm = SimBinarySVM(Kernel) (self.svm.separability, self.svm.label_to_int, self.svm.int_to_label) = self.svm._find_separability( self.training_classes) # print('similarity', similarity) assert self.svm.separability.size == self.class_cnt * self.class_cnt assert self.svm.separability[0].size == self.class_cnt # print('labelToINt:', labelToInt) assert len(self.svm.label_to_int.keys()) == 6 # print('int_to_label', int_to_label) for idx, val in enumerate(self.svm.int_to_label): assert self.svm.label_to_int[val] == idx @pytest.mark.run(after='test__find_similarity') def test_Train(self): self.svm.train(self.training_classes) def runner(current): if current.children == None: return assert len(current.svms) == len(current.children) for child in current.children: runner(child) runner(self.svm.tree.root) @pytest.mark.run(after='test_train') def test_predict(self): errors = 0 total = 0 for class_name, class_samples in self.training_classes.items(): for sample in class_samples: total += 1 if self.svm.predict(sample) != class_name: # wrong prediction errors += 1 # just to see the idea print('errors:', errors, ' total:', total) assert errors == 0 @pytest.mark.run(after='test_predict') def test_cross_validate(self): # 10 folds validation res = self.svm.cross_validate(10, self.training_classes) # this just to get the idea assert res == 0
class TestDataset(TestCase): file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/iris.csv' dataset = Dataset.load(file) splitted = Dataset.split(dataset) def test_Load(self): assert len(self.dataset.features[0]) == 4 def test_Split(self): assert len(self.splitted.keys()) == 3 sum_splitted = 0 for name, members in self.splitted.items(): sum_splitted += len(members) for each in members: assert len(each) == 4 assert sum_splitted == len(self.dataset.features)
print('creating svm and testing with supplied test data') num_workers = multiprocessing.cpu_count() print('workers: ', num_workers) training_files = [ ('satimage', 'satimage/sat-train-s.csv', 'satimage/sat-test.csv'), ] for training in training_files: project_name = training[0] print('working on project: ', project_name) # load dataset training_file = training[1] training_set = Dataset.load(training_file) training_classes = Dataset.split(training_set) testing_file = training[2] testing_set = Dataset.load(testing_file) testing_classes = Dataset.split(testing_set) best = {} for each in ( ('OAO', OAOSVM), ('SimBinarySVM', SimBinarySVM), ('SimMultiSVM', SimMultiSVM), ): svm_type = each[0]
('letter', 'datasets/letter/letter-train.txt', 'datasets/letter/letter-test.txt', lambda row: (row[1:], row[0])), ] for training in training_files: project_name = training[0] print('working on project: ', project_name) # load dataset given_adapter = None if len(training) > 3: given_adapter = training[3] training_file = training[1] print('train: ', training_file) training_set = Dataset.load(training_file, adapter=given_adapter) training_classes = Dataset.split(training_set) testing_file = training[2] print('test: ', testing_file) testing_set = Dataset.load(testing_file, adapter=given_adapter) testing_classes = Dataset.split(testing_set) best = {} avg = {} for each in ( ('OAO', OAOSVM), ('OAA', OAASVM), ('SimMultiSVM', SimMultiSVM), # ('SimBinarySVM_ORI', SimBinarySVMORI),
class TestSimBinarySVM(TestCase): training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/satimage/sat-train-s.csv' training_set = Dataset.load(training_file) training_classes = Dataset.split(training_set) class_cnt = len(training_classes.keys()) gamma = 1e-6 C = 0.01 svm = SimBinarySVM(gamma=gamma, C=C) # def test_MakeRBFKernel(self): # self.fail() def test_find_separability(self): # svm = SimBinarySVM(Kernel) (self.svm.separability, self.svm.label_to_int, self.svm.int_to_label) = self.svm._find_separability( self.training_classes) # print('similarity', similarity) assert self.svm.separability.size == self.class_cnt * self.class_cnt assert self.svm.separability[0].size == self.class_cnt # print('labelToINt:', labelToInt) assert len(self.svm.label_to_int.keys()) == 6 # print('intToLabel', intToLabel) for idx, val in enumerate(self.svm.int_to_label): assert self.svm.label_to_int[val] == idx @pytest.mark.run(after='test_find_separability') def test_construct_mst_graph(self): (self.svm.mst_graph, self.svm.mst_list) = self.svm._construct_mst_graph( self.training_classes, self.svm.separability) assert len(self.svm.mst_list) == self.class_cnt - 1 assert len(self.svm.mst_graph.connected_with(0)) == self.class_cnt cnt = 0 for i, row in enumerate(self.svm.mst_graph.connection): for j, dist in enumerate(row): if dist != float('inf'): cnt += 1 # the graph bidirectional assert cnt == (self.class_cnt - 1) * 2 @pytest.mark.run(after='test_construct_mst_graph') def test_construct_tree(self): self.svm.tree = self.svm._construct_tree(self.svm.mst_graph, self.svm.mst_list) def runner(current): if current.left is None and current.right is None: return assert len( current.val) == len(current.left.val) + len(current.right.val) assert set(current.val) == set(current.left.val + current.right.val) runner(current.left) runner(current.right) runner(self.svm.tree.root) @pytest.mark.run(after='test_construct_tree') def test_train(self): self.svm.train(self.training_classes) def runner(current): if current.left is None and current.right is None: return assert current.svm runner(current.left) runner(current.right) runner(self.svm.tree.root) @pytest.mark.run(after='test_train') def test_predict(self): errors = 0 total = 0 for class_name, class_samples in self.training_classes.items(): for sample in class_samples: total += 1 if self.svm.predict(sample) != class_name: # wrong prediction errors += 1 # just to see the idea print('errors:', errors, ' total:', total) assert errors == 0 @pytest.mark.run(after='test_predict') def test_cross_validate(self): # 10 folds validation res = self.svm.cross_validate(10, self.training_classes) # this just to get the idea assert res == 0 def test_make_gram_matrix(self): gamma = 0.1 vectors = [] training_classes_with_idx = {} idx = 0 for name, points in self.training_classes.items(): this_class = training_classes_with_idx[name] = [] for point in points: # give it an index vector = point.tolist() vector_with_idx = [idx] + vector idx += 1 vectors.append(vector) this_class.append(vector_with_idx) training_classes_with_idx[name] = numpy.array(this_class) vectors = numpy.array(vectors) kernel = self.svm.make_gram_matrix(vectors, gamma) def original_kernel(a, b): import numpy return numpy.exp(-gamma * numpy.linalg.norm(a - b)**2) for class_name, samples in training_classes_with_idx.items(): a = samples b = a[:].tolist() random.shuffle(b) b = numpy.array(b) for i in range(a.shape[0]): assert abs( kernel(a[i], b[i]) - original_kernel(a[i][1:], b[i][1:])) < 1e-5
class TestSimBinarySVMORI(TestCase): training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/satimage/sat-train-s.csv' training_set = Dataset.load(training_file) training_classes = Dataset.split(training_set) class_cnt = len(training_classes.keys()) gamma = 1e-6 C = 0.01 svm = SimBinarySVMORI(gamma=gamma, C=C) def test_create_mapping(self): self.label_to_int, self.int_to_label = self.svm._create_mapping( self.training_classes) @pytest.mark.run(after='test_create_mapping') def test_create_tree(self): self.label_to_int, self.int_to_label = self.svm._create_mapping( self.training_classes) self.group_mgr = self.svm._create_tree(self.training_classes, self.label_to_int) def runner(current): if current.children == None: return child_universe = [] for child in current.children: child_universe += list(child.universe.keys()) assert set(current.universe.keys()) == set(child_universe) for child in current.children: runner(child) runner(next(iter(self.group_mgr.groups.values()))) @pytest.mark.run(after='test_construct_tree') def test_train(self): group_mgr = self.svm.train(self.training_classes) def runner(current): if current.children == None: return assert current.svm for child in current.children: runner(child) runner(next(iter(group_mgr.groups.values()))) @pytest.mark.run(after='test_train') def test_predict(self): group_mgr = self.svm.train(self.training_classes) errors = 0 total = 0 for class_name, class_samples in self.training_classes.items(): for sample in class_samples: total += 1 if self.svm.predict(sample) != class_name: # wrong prediction errors += 1 # just to see the idea print('errors:', errors, ' total:', total) assert errors == 0 @pytest.mark.run(after='test_predict') def test_cross_validate(self): group_mgr = self.svm.train(self.training_classes) # 10 folds validation res = self.svm.cross_validate(10, self.training_classes) # this just to get the idea assert res == 0 def test_make_gram_matrix(self): gamma = 0.1 vectors = [] training_classes_with_idx = {} idx = 0 for name, points in self.training_classes.items(): this_class = training_classes_with_idx[name] = [] for point in points: # give it an index vector = point.tolist() vector_with_idx = [idx] + vector idx += 1 vectors.append(vector) this_class.append(vector_with_idx) training_classes_with_idx[name] = numpy.array(this_class) vectors = numpy.array(vectors) kernel = self.svm.make_gram_matrix(vectors, gamma) def original_kernel(a, b): import numpy return numpy.exp(-gamma * numpy.linalg.norm(a - b)**2) for class_name, samples in training_classes_with_idx.items(): a = samples b = a[:].tolist() random.shuffle(b) b = numpy.array(b) for i in range(a.shape[0]): assert abs( kernel(a[i], b[i]) - original_kernel(a[i][1:], b[i][1:])) < 1e-5
import time from treesvm.dataset import Dataset from treesvm import SimBinarySVM __author__ = 'phizaz' def timer(func): start_time = time.process_time() func() return time.process_time() - start_time # ('letter', 'datasets/letter/letter-train.txt', 'datasets/letter/letter-test.txt', lambda row: (row[1:], row[0])) training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/treesvm/datasets/letter/letter-train.txt' # training_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/simbinarysvm/generated/generated.csv' training_set = Dataset.load(training_file, adapter=lambda row: (row[1:], row[0])) training_classes = Dataset.split(training_set) testing_file = '/Users/phizaz/Dropbox/waseda-internship/svm-implementations/treesvm/datasets/letter/letter-test.txt' testing_set = Dataset.load(testing_file, adapter=lambda row: (row[1:], row[0])) testing_classes = Dataset.split(testing_set) svm = SimBinarySVM(gamma=0.001, C=10, verbose=True) def train(): svm.train(training_classes) print('training: %.4f' % (timer(train))) result = None def test(): global result result = svm.test(testing_classes) print('testing: %.4f' % (timer(test)))
# ('pendigits', 'datasets/pendigits/pendigits.tra', 'datasets/pendigits/pendigits.tes', lambda row: (row[:-1], row[-1])), ("letter", "datasets/letter/letter-train.txt", "datasets/letter/letter-test.txt", lambda row: (row[1:], row[0])) ] for training in training_files: project_name = training[0] print("working on project: ", project_name) # load dataset given_adapter = None if len(training) > 3: given_adapter = training[3] training_file = training[1] print("train: ", training_file) training_set = Dataset.load(training_file, adapter=given_adapter) training_classes = Dataset.split(training_set) testing_file = training[2] print("test: ", testing_file) testing_set = Dataset.load(testing_file, adapter=given_adapter) testing_classes = Dataset.split(testing_set) best = {} avg = {} for each in ( ("OAO", OAOSVM), ("OAA", OAASVM), ("SimMultiSVM", SimMultiSVM), # ('SimBinarySVM_ORI', SimBinarySVMORI),