예제 #1
0
def function(params):
    """
  Function to be optimized.
  """
    # generate config
    config = jubatus_config(params)
    # create a classifier service.
    classifier = Classifier.run(config)
    # scoring metric (default accuracy metric)
    metric = accuracy_score
    # calculate cross-validation score
    score = cv_score(classifier, dataset, metric=metric)
    # stop the classifier
    classifier.stop()
    # print score and hyperparameters
    print_log(score, params)
    # hyperopt only minimize target function and we convert the accuracy score to be minimized.
    return -1.0 * score
예제 #2
0
# Load built-in `iris` dataset from scikit-learn.
iris = sklearn.datasets.load_iris()

# Convert it into jubakit Dataset.
#dataset = Dataset.from_array(iris.data, iris.target)
# ... or, optionally you can assign feature/label names to improve human-readbility.
dataset = Dataset.from_array(iris.data, iris.target, iris.feature_names,
                             iris.target_names)

# Shuffle the dataset, as the dataset is sorted by label.
dataset = dataset.shuffle()

# Create a Classifier Service.
# Classifier process starts using a default configuration.
classifier = Classifier.run(Config())

# Prepare arrays to keep true/predicted labels to display a report later.
true_labels = []
predicted_labels = []

# Run stratified K-fold validation.
labels = list(dataset.get_labels())
if sklearn_version < 18:
    train_test_indices = StratifiedKFold(labels, n_folds=10)
else:
    skf = StratifiedKFold(n_splits=10)
    train_test_indices = skf.split(labels, labels)

for train_idx, test_idx in train_test_indices:
    # Clear the classifier (call `clear` RPC).
예제 #3
0
# Load LIBSVM files.
# Note that these example files are not included in this repository.
# You can fetch them from: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#news20
print("Loading LIBSVM files...")
(train_X, train_y, test_X, test_y) = load_svmlight_files(['news20', 'news20.t'])

# Create a Train Dataset.
print("Creating train dataset...")
train_ds = Dataset.from_matrix(train_X, train_y)

# Create a Test Dataset
print("Creating test dataset...")
test_ds = Dataset.from_matrix(test_X, test_y)

# Create a Classifier Service
classifier = Classifier.run(Config())

# Train the classifier.
print("Training...")
for (idx, _) in classifier.train(train_ds):
  if idx % 1000 == 0:
    print("Training... ({0} %)".format(100 * idx / len(train_ds)))

# Test the classifier.
print("Testing...")
y_true = []
y_pred = []
for (idx, label, result) in classifier.classify(test_ds):
  y_true.append(label)
  y_pred.append(result[0][0])
  if idx % 1000 == 0:
예제 #4
0
# Load a CSV file.
loader = CSVLoader('iris.csv')

# Define a Schema that defines types for each columns of the CSV file.
schema = Schema({
  'Species': Schema.LABEL,
}, Schema.NUMBER)

# Display Schema
print('Schema: {0}'.format(schema))

# Create a Dataset.
dataset = Dataset(loader, schema).shuffle()
n_samples = len(dataset)
n_train_samples = int(n_samples / 2)

# Create a Classifier configuration.
cfg = Config()

# Bulk train-test the classifier.
result = Classifier.train_and_classify(
  cfg,
  dataset[:n_train_samples],
  dataset[n_train_samples:],
  sklearn.metrics.classification_report
)

print('---- Classification Report -----------------------------------')
print(result)
예제 #5
0
  'Sepal.Length': Schema.NUMBER,
  'Sepal.Width': Schema.NUMBER,
  'Petal.Length': Schema.NUMBER,
  'Petal.Width': Schema.NUMBER,
})

# Create a Dataset, which is an abstract representation of a set of data
# that can be fed to Services like Classifier.  `shuffle()` returns a new
# Dataset whose order of data is shuffled.  Note that datasets are immutable
# objects.
dataset = Dataset(loader, schema).shuffle()

# Create a Classifier Service.
# Classifier process starts using a default configuration.
cfg = Config.default()
classifier = Classifier.run(cfg)

# You can also connect to an existing service instead.
#classifier = Classifier('127.0.0.1', 9199)

# Train the classifier with every data in the dataset.
for (idx, label) in classifier.train(dataset):
  # You can peek the datum being trained.
  print("Train: {0}".format(dataset[idx]))

# Save the trained model file.
print("Saving model file...")
classifier.save('example_snapshot')

# Classify using the same dataset.
for (idx, label, result) in classifier.classify(dataset):
예제 #6
0
 def setUp(self):
   self._service = Classifier.run(Config())
예제 #7
0
 def setUp(self):
   self._service = Classifier.run(Config())
   self._sh = self._service._shell()
예제 #8
0
 def setUp(self):
     self._service = Classifier.run(Config())
예제 #9
0
 def test_simple(self):
     classifier = Classifier()
예제 #10
0
# Try finding the best classifier parameter.
param2metrics = {}
for method in ['AROW', 'NHERD', 'CW']:
  for rw in [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0]:
    print('Running ({0} / regularization_weight = {1})...'.format(method, rw))

    # Create a config data structure.
    jubatus_config = Config(method=method, parameter={'regularization_weight': rw})

    # It is equivalent to:
    #jubatus_config = Config.default()
    #jubatus_config['method'] = method
    #jubatus_config['parameter']['regularization_weight'] = rw

    # Launch Jubatus server using the specified configuration.
    classifier = Classifier.run(jubatus_config)

    # Train with the dataset.
    for _ in classifier.train(dataset):
      pass

    # Classify with the same dataset.
    y_true = []
    y_pred = []
    for (idx, label, result) in classifier.classify(dataset):
      y_true.append(label)
      y_pred.append(result[0][0])

    classifier.stop()

    # Store the metrics for current configuration.
예제 #11
0
for method in ['AROW', 'NHERD', 'CW']:
    for rw in [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0]:
        print('Running ({0} / regularization_weight = {1})...'.format(
            method, rw))

        # Create a config data structure.
        jubatus_config = Config(method=method,
                                parameter={'regularization_weight': rw})

        # It is equivalent to:
        #jubatus_config = Config.default()
        #jubatus_config['method'] = method
        #jubatus_config['parameter']['regularization_weight'] = rw

        # Launch Jubatus server using the specified configuration.
        classifier = Classifier.run(jubatus_config)

        # Train with the dataset.
        for _ in classifier.train(dataset):
            pass

        # Classify with the same dataset.
        y_true = []
        y_pred = []
        for (idx, label, result) in classifier.classify(dataset):
            y_true.append(label)
            y_pred.append(result[0][0])

        classifier.stop()

        # Store the metrics for current configuration.
예제 #12
0
 def test_embedded(self):
   classifier = Classifier.run(Config(), embedded=True)
# calculate the domain
X_min = X.min(axis=0)
#X_min = np.ones(X.shape[1])
X_max = X.max(axis=0)
X0, X1 = np.meshgrid(np.linspace(X_min[0], X_max[0], meshsize),
                     np.linspace(X_min[1], X_max[1], meshsize))

# make training dataset
dataset = Dataset.from_array(X, y)
# make mesh dataset to plot decision surface
contourf_dataset = Dataset.from_array(np.c_[X0.ravel(), X1.ravel()])

# setup and run jubatus
config = Config(method=method,
                parameter={'regularization_weight': regularization_weight})
classifier = Classifier.run(config, port=port)

# construct classifier prediction models and dump model weights
for i, _ in enumerate(classifier.train(dataset)):
    model_name = 'decision_surface_{}'.format(i)
    classifier.save(name=model_name)

# prepare figure
fig, ax = plt.subplots()


def draw_decision_surface(i):
    midx = int(i / 2)
    sidx = int(i / 2) + (i % 2)
    # load jubatus prediction model
    model_name = 'decision_surface_{}'.format(midx)
예제 #14
0
from jubakit.model import JubaDump

# Load the digits dataset.
digits = load_digits()

# Create a dataset.
dataset = Dataset.from_array(digits.data, digits.target)
n_samples = len(dataset)
n_train_samples = int(n_samples * 0.7)
train_ds = dataset[:n_train_samples]
test_ds = dataset[n_train_samples:]

# Create a classifier.
config = Config(method='AROW',
                parameter={'regularization_weight': 0.1})
classifier = Classifier.run(config)

model_name = 'classifier_digits'
model_path = '/tmp/{}_{}_classifier_{}.jubatus'.format(
    classifier._host, classifier._port, model_name)

# show the feature weights of the target label.
target_label = 4

# Initialize summary writer.
writer = SummaryWriter()

# train and test the classifier.
epochs = 100
for epoch in range(epochs):
    # train
예제 #15
0
 def setUp(self):
     self._service = Classifier.run(Config())
     self._sh = self._service._shell()
예제 #16
0
from jubakit.classifier import Classifier, Dataset, Config
from jubakit.model import JubaDump

# Load the digits dataset.
digits = load_digits()

# Create a dataset.
dataset = Dataset.from_array(digits.data, digits.target)
n_samples = len(dataset)
n_train_samples = int(n_samples * 0.7)
train_ds = dataset[:n_train_samples]
test_ds = dataset[n_train_samples:]

# Create a classifier.
config = Config(method='AROW', parameter={'regularization_weight': 0.1})
classifier = Classifier.run(config)

model_name = 'classifier_digits'
model_path = '/tmp/{}_{}_classifier_{}.jubatus'.format(classifier._host,
                                                       classifier._port,
                                                       model_name)

# show the feature weights of the target label.
target_label = 4

# Initialize summary writer.
writer = SummaryWriter()

# train and test the classifier.
epochs = 100
for epoch in range(epochs):