def test_from_matrix(self): ds = Dataset.from_matrix( self._create_matrix(), # data [ 0, 1, 0 ], # labels [ 'k1', 'k2', 'k3'], # feature_names [ 'pos', 'neg'], # label_names ) expected_labels = ['pos', 'neg', 'pos'] expected_k1s = [1,None,4] expected_k3s = [2,3,6] actual_labels = [] actual_k1s = [] actual_k3s = [] for (idx, (label, d)) in ds: actual_labels.append(label) actual_k1s.append(dict(d.num_values).get('k1', None)) actual_k3s.append(dict(d.num_values).get('k3', None)) self.assertEqual(expected_labels, actual_labels) self.assertEqual(expected_k1s, actual_k1s) self.assertEqual(expected_k3s, actual_k3s)
def test_from_matrix(self): ds = Dataset.from_matrix( self._create_matrix(), # data [0, 1, 0], # labels ['k1', 'k2', 'k3'], # feature_names ['pos', 'neg'], # label_names ) expected_labels = ['pos', 'neg', 'pos'] expected_k1s = [1, None, 4] expected_k3s = [2, 3, 6] actual_labels = [] actual_k1s = [] actual_k3s = [] for (idx, (label, d)) in ds: actual_labels.append(label) actual_k1s.append(dict(d.num_values).get('k1', None)) actual_k3s.append(dict(d.num_values).get('k3', None)) self.assertEqual(expected_labels, actual_labels) self.assertEqual(expected_k1s, actual_k1s) self.assertEqual(expected_k3s, actual_k3s)
from sklearn.datasets import load_svmlight_files import sklearn.metrics import jubakit from jubakit.classifier import Classifier, Dataset, Config # Load LIBSVM files. # Note that these example files are not included in this repository. # You can fetch them from: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#news20 print("Loading LIBSVM files...") (train_X, train_y, test_X, test_y) = load_svmlight_files(['news20', 'news20.t']) # Create a Train Dataset. print("Creating train dataset...") train_ds = Dataset.from_matrix(train_X, train_y) # Create a Test Dataset print("Creating test dataset...") test_ds = Dataset.from_matrix(test_X, test_y) # Create a Classifier Service classifier = Classifier.run(Config()) # Train the classifier. print("Training...") for (idx, _) in classifier.train(train_ds): if idx % 1000 == 0: print("Training... ({0} %)".format(100 * idx / len(train_ds))) # Test the classifier.