Example #1
0
def demo(output_file=None, instances=50000):
    """ _test_sam_knn_prequential

    This demo shows how to produce a prequential evaluation.

    The first thing needed is a stream. For this case we use a file stream 
    which gets its samples from the movingSquares.csv file, inside the datasets 
    folder.

    Then we need to setup a classifier, which in this case is an instance 
    of scikit-multiflow's SAMKNN. Then, optionally we create a 
    pipeline structure, initialized on that classifier.

    The evaluation is then run.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    # opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False)
    opt = FileOption("FILE", "OPT_NAME", "../datasets/movingSquares.csv",
                     "CSV", False)
    stream = FileStream(opt, -1, 1)
    # stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    # classifier = SGDClassifier()
    # classifier = KNNAdwin(k=8, max_window_size=2000,leaf_size=40, categorical_list=None)
    # classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    classifier = SAMKNN(n_neighbors=5,
                        knnWeights='distance',
                        maxSize=1000,
                        STMSizeAdaption='maxACCApprox',
                        useLTM=False)
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    #pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(pretrain_size=0,
                               max_instances=instances,
                               batch_size=1,
                               n_wait=100,
                               max_time=1000,
                               output_file=output_file,
                               task_type='classification',
                               show_plot=True,
                               plot_options=['performance'])

    # Evaluate
    eval.eval(stream=stream, classifier=classifier)
def demo_parameterized(h, dset="sea_stream.csv", show_plot=True): 
    # Setup Stream
    opt = FileOption("FILE", "OPT_NAME", "../datasets/"+dset, "CSV", False)
    stream = FileStream(opt, -1, 1)
    stream.prepare_for_use()

    # For each classifier, e...
    T_init = 100
    eval = EvaluatePrequential(pretrain_size=T_init, output_file='output.csv', max_instances=10000, batch_size=1, n_wait=1000, task_type='classification', show_plot=show_plot, plot_options=['performance'])
    eval.eval(stream=stream, classifier=h)
def demo(output_file=None, instances=40000):
    """ _test_prequential_mol

    This demo shows the evaluation process of a MOL classifier, initialized 
    with sklearn's SGDClassifier.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    #opt = FileOption("FILE", "OPT_NAME", "../datasets/music.csv", "CSV", False)
    #stream = FileStream(opt, 0, 6)
    stream = MultilabelGenerator(n_samples=instances)
    #stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    classifier = MultiOutputLearner(SGDClassifier(n_iter=100))
    #classifier = SGDClassifier()
    #classifier = PassiveAggressiveClassifier()
    #classifier = SGDRegressor()
    #classifier = PerceptronMask()

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(
        pretrain_size=5000,
        max_instances=instances - 10000,
        batch_size=1,
        n_wait=200,
        max_time=1000,
        output_file=output_file,
        task_type='multi_output',
        show_plot=True,
        plot_options=['hamming_score', 'j_index', 'exact_match'])

    # Evaluate
    eval.eval(stream=stream, classifier=pipe)
Example #4
0
def demo(output_file=None, instances=40000):
    """ _test_prequential_bagging
    
    This demo shows the evaluation process of a LeverageBagging classifier, 
    initialized with KNN classifiers.
    
    Parameters
    ----------
    output_file: string
        The name of the csv output file
    
    instances: int
        The evaluation's max number of instances
    
    """
    # Setup the File Stream
    # opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_big.csv", "CSV", False)
    # stream = FileStream(opt, -1, 1)
    stream = SEAGenerator(classification_function=2,
                          instance_seed=755437,
                          noise_percentage=0.0)
    stream.prepare_for_use()

    # Setup the classifier
    #classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    classifier = LeverageBagging(h=KNN(k=8, max_window_size=2000,
                                       leaf_size=30),
                                 ensemble_length=1)

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(
        pretrain_size=2000,
        max_instances=instances,
        batch_size=1,
        n_wait=200,
        max_time=1000,
        output_file=output_file,
        task_type='classification',
        show_plot=True,
        plot_options=['kappa', 'kappa_t', 'performance'])

    # Evaluate
    eval.eval(stream=stream, classifier=pipe)
def demo(output_file=None, instances=40000):
    """ _test_regression

    This demo demonstrates how to evaluate a regressor. The data stream used 
    is an instance of the RegressionGenerator, which feeds an instance from 
    sklearn's SGDRegressor.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    #opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False)
    #stream = FileStream(opt, -1, 1)
    #stream = WaveformGenerator()
    #stream.prepare_for_use()
    stream = RegressionGenerator(n_samples=40000)
    # Setup the classifier
    #classifier = SGDClassifier()
    #classifier = PassiveAggressiveClassifier()
    classifier = SGDRegressor()
    #classifier = PerceptronMask()

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(pretrain_size=1,
                               max_instances=instances,
                               batch_size=1,
                               n_wait=1,
                               max_time=1000,
                               output_file=output_file,
                               task_type='regression',
                               show_plot=True,
                               plot_options=['true_vs_predicts'])

    # Evaluate
    eval.eval(stream=stream, classifier=pipe)
def demo():

    # The classifier we will use (other options: SAMKNN, LeverageBagging, SGD)
    h = HoeffdingTree()

    # Setup Stream
    opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_stream.csv", "CSV",
                     False)
    stream = FileStream(opt, -1, 1)
    stream.prepare_for_use()

    T_init = 100
    eval = EvaluatePrequential(pretrain_size=T_init,
                               output_file='output.csv',
                               max_instances=10000,
                               batch_size=1,
                               n_wait=1000,
                               task_type='classification',
                               show_plot=True,
                               plot_options=['performance'])
    eval.eval(stream=stream, classifier=h)
Example #7
0
def demo():
    """ _test_pipeline
    
    This demo demonstrates the Pipeline structure seemingly working as a 
    learner, while being passed as parameter to an EvaluatePrequential 
    object.
     
    """
    # # Setup the stream
    # opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False)
    # stream = FileStream(opt, -1, 1)
    # stream.prepare_for_use()
    # # If used for Hoeffding Trees then need to pass indices for Nominal attributes

    # Test with RandomTreeGenerator
    # stream = RandomTreeGenerator(n_classes=2, n_numerical_attributes=5)
    # stream.prepare_for_use()

    # Test with WaveformGenerator
    stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    #classifier = PerceptronMask()
    #classifier = NaiveBayes()
    #classifier = PassiveAggressiveClassifier()
    classifier = HoeffdingTree()

    # Setup the pipeline
    pipe = Pipeline([('Hoeffding Tree', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(show_plot=True,
                               pretrain_size=1000,
                               max_instances=100000)

    # Evaluate
    eval.eval(stream=stream, classifier=pipe)
from skmultiflow.options.file_option import FileOption

from my_classifier import BatchClassifier

dataset = "elec"

# 1. Create a stream
opt = FileOption("FILE", "OPT_NAME", "./data/" + dataset + ".csv", "CSV",
                 False)
stream = FileStream(opt, -1, 1)
# 2. Prepare for use
stream.prepare_for_use()
# 2. Instantiate the HoeffdingTree classifier
h = [
    KNN(k=10, max_window_size=100, leaf_size=30),
    HoeffdingTree(),
    BatchClassifier(window_size=100, max_models=10),
]
# 3. Setup the evaluator
eval = EvaluatePrequential(pretrain_size=1000,
                           output_file='result_' + dataset + '.csv',
                           max_instances=10000,
                           batch_size=1,
                           n_wait=500,
                           max_time=1000000000,
                           task_type='classification',
                           show_plot=True,
                           plot_options=['performance'])
# 4. Run
eval.eval(stream=stream, classifier=h)
Example #9
0
from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential

# 1. Create a stream
stream = WaveformGenerator()
stream.prepare_for_use()

# 2. Instantiate the classifier
adf = AdaptiveRandomForest()

# 3. Setup the evaluator
eval = EvaluatePrequential(show_plot=True,
                           pretrain_size=100,
                           max_instances=10000)

# 4. Run evaluation
eval.eval(stream=stream, classifier=adf)

#
# # Eval Prequential with datasets.csv for ARF
#

# In[47]:

from skmultiflow.options.file_option import FileOption
from skmultiflow.data.file_stream import FileStream
from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential

# 1. Create a stream
#options = FileOption(option_value="../datasets/covtype.csv", file_extension="CSV")
#options = FileOption(option_value="../datasets/movingSquares.csv", file_extension="CSV")
options = FileOption(option_value="../datasets/sea_stream.csv",
Example #10
0
def demo(instances=2000):
    """ _test_comparison_prequential
    
    This demo will test a prequential evaluation when more than one learner is 
    passed, which makes it a comparison task.
    
    Parameters
    ----------
    instances: int
        The evaluation's maximum number of instances.
     
    """
    # Stream setup
    opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV",
                     False)
    #opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_big.csv", "CSV", False)
    stream = FileStream(opt, -1, 1)
    #stream = SEAGenerator(classification_function=2, instance_seed=53432, balance_classes=False)
    stream.prepare_for_use()
    # Setup the classifier
    clf = SGDClassifier()
    # classifier = KNNAdwin(k=8, max_window_size=2000,leaf_size=40, categorical_list=None)
    # classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    clf_one = KNNAdwin(k=8, max_window_size=1000, leaf_size=30)
    #clf_two = KNN(k=8, max_window_size=1000, leaf_size=30)
    #clf_two = LeverageBagging(h=KNN(), ensemble_length=2)

    t_one = OneHotToCategorical([[10, 11, 12, 13],
                                 [
                                     14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
                                     24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
                                     34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
                                     44, 45, 46, 47, 48, 49, 50, 51, 52, 53
                                 ]])
    #t_two = OneHotToCategorical([[10, 11, 12, 13],
    #                        [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
    #                        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]])

    pipe_one = Pipeline([('one_hot_to_categorical', t_one), ('KNN', clf_one)])
    #pipe_two = Pipeline([('one_hot_to_categorical', t_two), ('KNN', clf_two)])

    classifier = [clf, pipe_one]
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    #pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(pretrain_size=2000,
                               output_file='teste.csv',
                               max_instances=instances,
                               batch_size=1,
                               n_wait=200,
                               max_time=1000,
                               task_type='classification',
                               show_plot=True,
                               plot_options=['performance', 'kappa_t'])

    # Evaluate
    eval.eval(stream=stream, classifier=classifier)