def test_from_array_without_label(self): ds = Dataset.from_array( [[10, 20, 30], [20, 10, 50], [40, 10, 30]], # data None, # labels ['k1', 'k2', 'k3'], # feature_names ['pos', 'neg'], # label_names ) expected_labels = [None, None, None] expected_k1s = [10, 20, 40] actual_labels = [] actual_k1s = [] for (idx, (label, d)) in ds: actual_labels.append(label) actual_k1s.append(dict(d.num_values)['k1']) self.assertEqual(expected_labels, actual_labels) self.assertEqual(expected_k1s, actual_k1s)
def test_from_array(self): ds = Dataset.from_array( [ [10,20,30], [20,10,50], [40,10,30] ], # data [ 0, 1, 0 ], # labels ['k1', 'k2', 'k3'], # feature_names ['pos', 'neg'], # label_names ) expected_labels = ['pos', 'neg', 'pos'] expected_k1s = [10, 20, 40] actual_labels = [] actual_k1s = [] for (idx, (label, d)) in ds: actual_labels.append(label) actual_k1s.append(dict(d.num_values)['k1']) self.assertEqual(expected_labels, actual_labels) self.assertEqual(expected_k1s, actual_k1s)
from jubakit.classifier import Classifier, Dataset, Config # switch StratifiedKFold API sklearn_version = int(sklearn.__version__.split('.')[1]) if sklearn_version < 18: from sklearn.cross_validation import StratifiedKFold else: from sklearn.model_selection import StratifiedKFold # Load built-in `iris` dataset from scikit-learn. iris = sklearn.datasets.load_iris() # Convert it into jubakit Dataset. #dataset = Dataset.from_array(iris.data, iris.target) # ... or, optionally you can assign feature/label names to improve human-readbility. dataset = Dataset.from_array(iris.data, iris.target, iris.feature_names, iris.target_names) # Shuffle the dataset, as the dataset is sorted by label. dataset = dataset.shuffle() # Create a Classifier Service. # Classifier process starts using a default configuration. classifier = Classifier.run(Config()) # Prepare arrays to keep true/predicted labels to display a report later. true_labels = [] predicted_labels = [] # Run stratified K-fold validation. labels = list(dataset.get_labels()) if sklearn_version < 18:
# switch StratifiedKFold API sklearn_version = int(sklearn.__version__.split('.')[1]) if sklearn_version < 18: from sklearn.cross_validation import StratifiedKFold else: from sklearn.model_selection import StratifiedKFold # Load built-in `iris` dataset from scikit-learn. iris = sklearn.datasets.load_iris() # Convert it into jubakit Dataset. #dataset = Dataset.from_array(iris.data, iris.target) # ... or, optionally you can assign feature/label names to improve human-readbility. dataset = Dataset.from_array(iris.data, iris.target, iris.feature_names, iris.target_names) # Shuffle the dataset, as the dataset is sorted by label. dataset = dataset.shuffle() # Create a Classifier Service. # Classifier process starts using a default configuration. classifier = Classifier.run(Config()) # Prepare arrays to keep true/predicted labels to display a report later. true_labels = [] predicted_labels = [] # Run stratified K-fold validation. labels = list(dataset.get_labels()) if sklearn_version < 18:
=================================================== In this example we show classification using Digits dataset. """ import sklearn.datasets import sklearn.metrics import jubakit from jubakit.classifier import Classifier, Dataset, Config # Load the digits dataset. digits = sklearn.datasets.load_digits() # Create a Dataset. dataset = Dataset.from_array(digits.data, digits.target) n_samples = len(dataset) n_train_samples = int(n_samples / 2) # Create a Classifier Service cfg = Config(method='AROW', parameter={'regularization_weight': 0.1}) classifier = Classifier.run(cfg) print("Started Service: {0}".format(classifier)) # Train the classifier using the first half of the dataset. train_ds = dataset[:n_train_samples] print("Training...: {0}".format(train_ds)) for _ in classifier.train(train_ds): pass
n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=0, # fixed seed ) # Convert arrays into jubakit Dataset. dataset = Dataset.from_array(X, y) # Try finding the best classifier parameter. param2metrics = {} for method in ['AROW', 'NHERD', 'CW']: for rw in [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0]: print('Running ({0} / regularization_weight = {1})...'.format(method, rw)) # Create a config data structure. jubatus_config = Config(method=method, parameter={'regularization_weight': rw}) # It is equivalent to: #jubatus_config = Config.default() #jubatus_config['method'] = method #jubatus_config['parameter']['regularization_weight'] = rw
=================================================== In this example we show classification using Digits dataset. """ import sklearn.datasets import sklearn.metrics import jubakit from jubakit.classifier import Classifier, Dataset, Config # Load the digits dataset. digits = sklearn.datasets.load_digits() # Create a Dataset. dataset = Dataset.from_array(digits.data, digits.target) n_samples = len(dataset) n_train_samples = int(n_samples / 2) # Create a Classifier Service cfg = Config(method='AROW', parameter={'regularization_weight': 0.1}) classifier = Classifier.run(cfg) print("Started Service: {0}".format(classifier)) # Train the classifier using the first half of the dataset. train_ds = dataset[:n_train_samples] print("Training...: {0}".format(train_ds)) for _ in classifier.train(train_ds): pass # Test the classifier using the last half of the dataset.
n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=0, # fixed seed ) # Convert arrays into jubakit Dataset. dataset = Dataset.from_array(X, y) # Try finding the best classifier parameter. param2metrics = {} for method in ['AROW', 'NHERD', 'CW']: for rw in [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0]: print('Running ({0} / regularization_weight = {1})...'.format( method, rw)) # Create a config data structure. jubatus_config = Config(method=method, parameter={'regularization_weight': rw}) # It is equivalent to: #jubatus_config = Config.default() #jubatus_config['method'] = method
le.fit(labels) c = le.transform(y) # scale dataset with (mean, variance) = (0, 1) scaler = StandardScaler() X = scaler.fit_transform(X) # calculate the domain X_min = X.min(axis=0) #X_min = np.ones(X.shape[1]) X_max = X.max(axis=0) X0, X1 = np.meshgrid(np.linspace(X_min[0], X_max[0], meshsize), np.linspace(X_min[1], X_max[1], meshsize)) # make training dataset dataset = Dataset.from_array(X, y) # make mesh dataset to plot decision surface contourf_dataset = Dataset.from_array(np.c_[X0.ravel(), X1.ravel()]) # setup and run jubatus config = Config(method=method, parameter={'regularization_weight': regularization_weight}) classifier = Classifier.run(config, port=port) # construct classifier prediction models and dump model weights for i, _ in enumerate(classifier.train(dataset)): model_name = 'decision_surface_{}'.format(i) classifier.save(name=model_name) # prepare figure fig, ax = plt.subplots()