def function(params): """ Function to be optimized. """ # generate config config = jubatus_config(params) # create a classifier service. classifier = Classifier.run(config) # scoring metric (default accuracy metric) metric = accuracy_score # calculate cross-validation score score = cv_score(classifier, dataset, metric=metric) # stop the classifier classifier.stop() # print score and hyperparameters print_log(score, params) # hyperopt only minimize target function and we convert the accuracy score to be minimized. return -1.0 * score
# Load built-in `iris` dataset from scikit-learn. iris = sklearn.datasets.load_iris() # Convert it into jubakit Dataset. #dataset = Dataset.from_array(iris.data, iris.target) # ... or, optionally you can assign feature/label names to improve human-readbility. dataset = Dataset.from_array(iris.data, iris.target, iris.feature_names, iris.target_names) # Shuffle the dataset, as the dataset is sorted by label. dataset = dataset.shuffle() # Create a Classifier Service. # Classifier process starts using a default configuration. classifier = Classifier.run(Config()) # Prepare arrays to keep true/predicted labels to display a report later. true_labels = [] predicted_labels = [] # Run stratified K-fold validation. labels = list(dataset.get_labels()) if sklearn_version < 18: train_test_indices = StratifiedKFold(labels, n_folds=10) else: skf = StratifiedKFold(n_splits=10) train_test_indices = skf.split(labels, labels) for train_idx, test_idx in train_test_indices: # Clear the classifier (call `clear` RPC).
# Load LIBSVM files. # Note that these example files are not included in this repository. # You can fetch them from: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#news20 print("Loading LIBSVM files...") (train_X, train_y, test_X, test_y) = load_svmlight_files(['news20', 'news20.t']) # Create a Train Dataset. print("Creating train dataset...") train_ds = Dataset.from_matrix(train_X, train_y) # Create a Test Dataset print("Creating test dataset...") test_ds = Dataset.from_matrix(test_X, test_y) # Create a Classifier Service classifier = Classifier.run(Config()) # Train the classifier. print("Training...") for (idx, _) in classifier.train(train_ds): if idx % 1000 == 0: print("Training... ({0} %)".format(100 * idx / len(train_ds))) # Test the classifier. print("Testing...") y_true = [] y_pred = [] for (idx, label, result) in classifier.classify(test_ds): y_true.append(label) y_pred.append(result[0][0]) if idx % 1000 == 0:
# Load a CSV file. loader = CSVLoader('iris.csv') # Define a Schema that defines types for each columns of the CSV file. schema = Schema({ 'Species': Schema.LABEL, }, Schema.NUMBER) # Display Schema print('Schema: {0}'.format(schema)) # Create a Dataset. dataset = Dataset(loader, schema).shuffle() n_samples = len(dataset) n_train_samples = int(n_samples / 2) # Create a Classifier configuration. cfg = Config() # Bulk train-test the classifier. result = Classifier.train_and_classify( cfg, dataset[:n_train_samples], dataset[n_train_samples:], sklearn.metrics.classification_report ) print('---- Classification Report -----------------------------------') print(result)
'Sepal.Length': Schema.NUMBER, 'Sepal.Width': Schema.NUMBER, 'Petal.Length': Schema.NUMBER, 'Petal.Width': Schema.NUMBER, }) # Create a Dataset, which is an abstract representation of a set of data # that can be fed to Services like Classifier. `shuffle()` returns a new # Dataset whose order of data is shuffled. Note that datasets are immutable # objects. dataset = Dataset(loader, schema).shuffle() # Create a Classifier Service. # Classifier process starts using a default configuration. cfg = Config.default() classifier = Classifier.run(cfg) # You can also connect to an existing service instead. #classifier = Classifier('127.0.0.1', 9199) # Train the classifier with every data in the dataset. for (idx, label) in classifier.train(dataset): # You can peek the datum being trained. print("Train: {0}".format(dataset[idx])) # Save the trained model file. print("Saving model file...") classifier.save('example_snapshot') # Classify using the same dataset. for (idx, label, result) in classifier.classify(dataset):
def setUp(self): self._service = Classifier.run(Config())
def setUp(self): self._service = Classifier.run(Config()) self._sh = self._service._shell()
def test_simple(self): classifier = Classifier()
# Try finding the best classifier parameter. param2metrics = {} for method in ['AROW', 'NHERD', 'CW']: for rw in [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0]: print('Running ({0} / regularization_weight = {1})...'.format(method, rw)) # Create a config data structure. jubatus_config = Config(method=method, parameter={'regularization_weight': rw}) # It is equivalent to: #jubatus_config = Config.default() #jubatus_config['method'] = method #jubatus_config['parameter']['regularization_weight'] = rw # Launch Jubatus server using the specified configuration. classifier = Classifier.run(jubatus_config) # Train with the dataset. for _ in classifier.train(dataset): pass # Classify with the same dataset. y_true = [] y_pred = [] for (idx, label, result) in classifier.classify(dataset): y_true.append(label) y_pred.append(result[0][0]) classifier.stop() # Store the metrics for current configuration.
for method in ['AROW', 'NHERD', 'CW']: for rw in [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0]: print('Running ({0} / regularization_weight = {1})...'.format( method, rw)) # Create a config data structure. jubatus_config = Config(method=method, parameter={'regularization_weight': rw}) # It is equivalent to: #jubatus_config = Config.default() #jubatus_config['method'] = method #jubatus_config['parameter']['regularization_weight'] = rw # Launch Jubatus server using the specified configuration. classifier = Classifier.run(jubatus_config) # Train with the dataset. for _ in classifier.train(dataset): pass # Classify with the same dataset. y_true = [] y_pred = [] for (idx, label, result) in classifier.classify(dataset): y_true.append(label) y_pred.append(result[0][0]) classifier.stop() # Store the metrics for current configuration.
def test_embedded(self): classifier = Classifier.run(Config(), embedded=True)
# calculate the domain X_min = X.min(axis=0) #X_min = np.ones(X.shape[1]) X_max = X.max(axis=0) X0, X1 = np.meshgrid(np.linspace(X_min[0], X_max[0], meshsize), np.linspace(X_min[1], X_max[1], meshsize)) # make training dataset dataset = Dataset.from_array(X, y) # make mesh dataset to plot decision surface contourf_dataset = Dataset.from_array(np.c_[X0.ravel(), X1.ravel()]) # setup and run jubatus config = Config(method=method, parameter={'regularization_weight': regularization_weight}) classifier = Classifier.run(config, port=port) # construct classifier prediction models and dump model weights for i, _ in enumerate(classifier.train(dataset)): model_name = 'decision_surface_{}'.format(i) classifier.save(name=model_name) # prepare figure fig, ax = plt.subplots() def draw_decision_surface(i): midx = int(i / 2) sidx = int(i / 2) + (i % 2) # load jubatus prediction model model_name = 'decision_surface_{}'.format(midx)
from jubakit.model import JubaDump # Load the digits dataset. digits = load_digits() # Create a dataset. dataset = Dataset.from_array(digits.data, digits.target) n_samples = len(dataset) n_train_samples = int(n_samples * 0.7) train_ds = dataset[:n_train_samples] test_ds = dataset[n_train_samples:] # Create a classifier. config = Config(method='AROW', parameter={'regularization_weight': 0.1}) classifier = Classifier.run(config) model_name = 'classifier_digits' model_path = '/tmp/{}_{}_classifier_{}.jubatus'.format( classifier._host, classifier._port, model_name) # show the feature weights of the target label. target_label = 4 # Initialize summary writer. writer = SummaryWriter() # train and test the classifier. epochs = 100 for epoch in range(epochs): # train
from jubakit.classifier import Classifier, Dataset, Config from jubakit.model import JubaDump # Load the digits dataset. digits = load_digits() # Create a dataset. dataset = Dataset.from_array(digits.data, digits.target) n_samples = len(dataset) n_train_samples = int(n_samples * 0.7) train_ds = dataset[:n_train_samples] test_ds = dataset[n_train_samples:] # Create a classifier. config = Config(method='AROW', parameter={'regularization_weight': 0.1}) classifier = Classifier.run(config) model_name = 'classifier_digits' model_path = '/tmp/{}_{}_classifier_{}.jubatus'.format(classifier._host, classifier._port, model_name) # show the feature weights of the target label. target_label = 4 # Initialize summary writer. writer = SummaryWriter() # train and test the classifier. epochs = 100 for epoch in range(epochs):