Beispiel #1
0
  def test_from_matrix(self):
    ds = Dataset.from_matrix(
      self._create_matrix(),    # data
      [ 0, 1, 0 ],              # labels
      [ 'k1', 'k2', 'k3'],      # feature_names
      [ 'pos', 'neg'],          # label_names
    )

    expected_labels = ['pos', 'neg', 'pos']
    expected_k1s = [1,None,4]
    expected_k3s = [2,3,6]
    actual_labels = []
    actual_k1s = []
    actual_k3s = []
    for (idx, (label, d)) in ds:
      actual_labels.append(label)
      actual_k1s.append(dict(d.num_values).get('k1', None))
      actual_k3s.append(dict(d.num_values).get('k3', None))

    self.assertEqual(expected_labels, actual_labels)
    self.assertEqual(expected_k1s, actual_k1s)
    self.assertEqual(expected_k3s, actual_k3s)
Beispiel #2
0
    def test_from_matrix(self):
        ds = Dataset.from_matrix(
            self._create_matrix(),  # data
            [0, 1, 0],  # labels
            ['k1', 'k2', 'k3'],  # feature_names
            ['pos', 'neg'],  # label_names
        )

        expected_labels = ['pos', 'neg', 'pos']
        expected_k1s = [1, None, 4]
        expected_k3s = [2, 3, 6]
        actual_labels = []
        actual_k1s = []
        actual_k3s = []
        for (idx, (label, d)) in ds:
            actual_labels.append(label)
            actual_k1s.append(dict(d.num_values).get('k1', None))
            actual_k3s.append(dict(d.num_values).get('k3', None))

        self.assertEqual(expected_labels, actual_labels)
        self.assertEqual(expected_k1s, actual_k1s)
        self.assertEqual(expected_k3s, actual_k3s)
Beispiel #3
0
from sklearn.datasets import load_svmlight_files
import sklearn.metrics

import jubakit
from jubakit.classifier import Classifier, Dataset, Config

# Load LIBSVM files.
# Note that these example files are not included in this repository.
# You can fetch them from: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#news20
print("Loading LIBSVM files...")
(train_X, train_y, test_X, test_y) = load_svmlight_files(['news20', 'news20.t'])

# Create a Train Dataset.
print("Creating train dataset...")
train_ds = Dataset.from_matrix(train_X, train_y)

# Create a Test Dataset
print("Creating test dataset...")
test_ds = Dataset.from_matrix(test_X, test_y)

# Create a Classifier Service
classifier = Classifier.run(Config())

# Train the classifier.
print("Training...")
for (idx, _) in classifier.train(train_ds):
  if idx % 1000 == 0:
    print("Training... ({0} %)".format(100 * idx / len(train_ds)))

# Test the classifier.
Beispiel #4
0
from sklearn.datasets import load_svmlight_files
import sklearn.metrics

import jubakit
from jubakit.classifier import Classifier, Dataset, Config

# Load LIBSVM files.
# Note that these example files are not included in this repository.
# You can fetch them from: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#news20
print("Loading LIBSVM files...")
(train_X, train_y, test_X,
 test_y) = load_svmlight_files(['news20', 'news20.t'])

# Create a Train Dataset.
print("Creating train dataset...")
train_ds = Dataset.from_matrix(train_X, train_y)

# Create a Test Dataset
print("Creating test dataset...")
test_ds = Dataset.from_matrix(test_X, test_y)

# Create a Classifier Service
classifier = Classifier.run(Config())

# Train the classifier.
print("Training...")
for (idx, _) in classifier.train(train_ds):
    if idx % 1000 == 0:
        print("Training... ({0} %)".format(100 * idx / len(train_ds)))

# Test the classifier.