Beispiel #1
0
  def test_simple(self):
    schema = Schema({
      'k1': Schema.STRING,
      'k2': Schema.LABEL,
    })

    (label, d) = schema.transform({'k1': 'abc', 'k2': 'def'})
    self.assertEqual(label, 'def')
    self.assertEqual({'k1': 'abc'}, dict(d.string_values))

    (label, d) = schema.transform({'k1': 'foo', 'k2': None})  # unlabeled data
    self.assertEqual(label, None)
    self.assertEqual({'k1': 'foo'}, dict(d.string_values))
Beispiel #2
0
  def test_simple(self):
    schema = Schema({
      'k1': Schema.STRING,
      'k2': Schema.LABEL,
    })

    (label, d) = schema.transform({'k1': 'abc', 'k2': 'def'})
    self.assertEqual(label, 'def')
    self.assertEqual({'k1': 'abc'}, dict(d.string_values))

    (label, d) = schema.transform({'k1': 'foo', 'k2': None})  # unlabeled data
    self.assertEqual(label, None)
    self.assertEqual({'k1': 'foo'}, dict(d.string_values))
Beispiel #3
0
    def test_invalid_get_labels(self):
        loader = StubLoader()
        schema = Schema({'v': Schema.LABEL})
        ds = Dataset(loader, schema, static=False)

        # get_labels returns generator; as generator will be evaluated
        # when actually iterating over it, pass it to list().
        self.assertRaises(RuntimeError, list, ds.get_labels())
Beispiel #4
0
 def test_simple(self):
     loader = StubLoader()
     schema = Schema({'v': Schema.LABEL})
     ds = Dataset(loader, schema)
     for (idx, (label, d)) in ds:
         self.assertEqual(unicode_t(idx + 1), label)
         self.assertEqual(0, len(d.string_values))
         self.assertEqual(0, len(d.num_values))
         self.assertEqual(0, len(d.binary_values))
     self.assertEqual(['1', '2', '3'], list(ds.get_labels()))
Beispiel #5
0
import sklearn.metrics

from jubakit.classifier import Classifier, Schema, Dataset, Config
from jubakit.loader.csv import CSVLoader
import jubakit.logger

# In this example, we enable logging mechanism to show you
# what's going on in jubakit.
jubakit.logger.setup_logger(jubakit.logger.INFO)

# Load a CSV file.
loader = CSVLoader('iris.csv')

# Define a Schema that defines types for each columns of the CSV file.
schema = Schema({
  'Species': Schema.LABEL,
}, Schema.NUMBER)

# Display Schema
print('Schema: {0}'.format(schema))

# Create a Dataset.
dataset = Dataset(loader, schema).shuffle()
n_samples = len(dataset)
n_train_samples = int(n_samples / 2)

# Create a Classifier configuration.
cfg = Config()

# Bulk train-test the classifier.
result = Classifier.train_and_classify(
Beispiel #6
0
* How to load CSV files and convert it into Jubakit dataset.
* Training the classifier using the dataset.
* Getting classification result.
"""

from jubakit.classifier import Classifier, Schema, Dataset, Config
from jubakit.loader.csv import CSVLoader

# Load a CSV file.
loader = CSVLoader('iris.csv')

# Define a Schema that defines types for each columns of the CSV file.
schema = Schema({
  'Species': Schema.LABEL,
  'Sepal.Length': Schema.NUMBER,
  'Sepal.Width': Schema.NUMBER,
  'Petal.Length': Schema.NUMBER,
  'Petal.Width': Schema.NUMBER,
})

# Create a Dataset, which is an abstract representation of a set of data
# that can be fed to Services like Classifier.  `shuffle()` returns a new
# Dataset whose order of data is shuffled.  Note that datasets are immutable
# objects.
dataset = Dataset(loader, schema).shuffle()

# Create a Classifier Service.
# Classifier process starts using a default configuration.
cfg = Config.default()
classifier = Classifier.run(cfg)
Beispiel #7
0
========================================

This is a famous `shogun` classifier example that predicts family name
of Shogun from his first name.
"""

from jubakit.classifier import Classifier, Schema, Dataset, Config
from jubakit.loader.csv import CSVLoader

# Load the shogun dataset.
train_loader = CSVLoader('shogun.train.csv')
test_loader = CSVLoader('shogun.test.csv')

# Define a Schema that defines types for each columns of the CSV file.
schema = Schema({
    'family_name': Schema.LABEL,
    'first_name': Schema.STRING,
})

# Create a Dataset.
train_dataset = Dataset(train_loader, schema).shuffle()
test_dataset = Dataset(test_loader, schema)

# Create a Classifier Service.
cfg = Config(method='PA',
             converter={
                 'string_rules': [{
                     'key': 'first_name',
                     'type': 'unigram',
                     'sample_weight': 'bin',
                     'global_weight': 'bin'
                 }]
Beispiel #8
0
 def test_get_labels(self):
     loader = StubLoader()
     schema = Schema({'v': Schema.LABEL})
     ds = Dataset(loader, schema)
     self.assertEqual(['1', '2', '3'], list(ds.get_labels()))
Beispiel #9
0
 def test_without_label(self):
     # schema without label can be defined
     Schema({
         'k1': Schema.STRING,
     })
Beispiel #10
0
    # Creates a Twitter stream loader.
    # Fill in your keys here;  you can get keys at: https://apps.twitter.com/
    return TwitterStreamLoader(
        TwitterOAuthHandler(
            consumer_key='XXXXXXXXXXXXXXXXXXXX',
            consumer_secret='XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX',
            access_token='XXXXXXXX-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX',
            access_secret='XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX',
        ))


# Define a Schema.
schema = Schema(
    {
        '.lang': Schema.LABEL,
        '.text': Schema.STRING,
        '.user.lang': Schema.STRING,
        '.user.description': Schema.STRING,
    }, Schema.IGNORE)

# Create a Classifier Service.
classifier = Classifier.run(Config())

# Number of tweets used for training.
n_train = 1000

print('---- Train: {0} tweets -------------------------------------'.format(
    n_train))

# Train the classifier using tweets from Twitter stream.
trained_labels = set()
Beispiel #11
0
 def test_predict(self):
     schema = Schema({
         'k1': Schema.STRING,
         'k2': Schema.LABEL,
     })
     self.assertRaises(RuntimeError, schema.predict, {}, True)