예제 #1
0
    def test_accuracy_stream(self):

        stream = SEAGenerator(random_state=42)
        stream.prepare_for_use()

        clf = ARSLVQ(sigma=0.5,
                     prototypes_per_class=2,
                     batch_size=5,
                     decay_rate=0.999)

        evaluator = EvaluatePrequential(show_plot=False,
                                        max_samples=20000,
                                        batch_size=5)

        evaluator.evaluate(stream, clf, model_names=['ARSLVQ'])

        measurements = np.asarray(evaluator.get_measurements()[0])[0]

        self.assertTrue(
            measurements.get_accuracy() >= 0.84,
            msg='Accuracy was {} but has to be greater than 0.84'.format(
                measurements.get_accuracy()))
        self.assertTrue(
            measurements.get_kappa() >= 0.68,
            msg='Kappa was {} but has to be greater than 0.68'.format(
                measurements.get_kappa()))
예제 #2
0
def demo(output_file=None):
    """ Test iSOUP-Tree

    This demo demonstrates how to evaluate a iSOUP-Tree multi-target regressor.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    """
    stream = RegressionGenerator(n_samples=5000,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=7)

    regressor = iSOUPTreeRegressor(leaf_prediction='adaptive')

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=1,
                                    batch_size=1,
                                    n_wait=200,
                                    max_time=1000,
                                    output_file=output_file,
                                    show_plot=False,
                                    metrics=[
                                        'average_mean_square_error',
                                        'average_mean_absolute_error',
                                        'average_root_mean_square_error'
                                    ])

    # Evaluate
    evaluator.evaluate(stream=stream, model=regressor)
예제 #3
0
def demo(output_file=None, instances=50000):
    """ _test_sam_knn_prequential

    This demo shows how to produce a prequential evaluation.

    The first thing needed is a stream. For this case we use a file stream 
    which gets its samples from the movingSquares.csv file, inside the datasets 
    folder.

    Then we need to setup a classifier, which in this case is an instance 
    of scikit-multiflow's SAMKNN. Then, optionally we create a 
    pipeline structure, initialized on that classifier.

    The evaluation is then run.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    # opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False)
    opt = FileOption("FILE", "OPT_NAME", "../datasets/movingSquares.csv",
                     "CSV", False)
    stream = FileStream(opt, -1, 1)
    # stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    # classifier = SGDClassifier()
    # classifier = KNNAdwin(k=8, max_window_size=2000,leaf_size=40, categorical_list=None)
    # classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    classifier = SAMKNN(n_neighbors=5,
                        knnWeights='distance',
                        maxSize=1000,
                        STMSizeAdaption='maxACCApprox',
                        useLTM=False)
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    #pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(pretrain_size=0,
                               max_instances=instances,
                               batch_size=1,
                               n_wait=100,
                               max_time=1000,
                               output_file=output_file,
                               task_type='classification',
                               show_plot=True,
                               plot_options=['performance'])

    # Evaluate
    eval.eval(stream=stream, classifier=classifier)
예제 #4
0
    def grid_job(self, clf, stream):
        clf_result = []
        time_result = []
        params = self.search_best_parameters(clf)
        self.chwd_root()
        os.chdir(os.path.join(os.getcwd(), self.path))
        print(clf.__class__.__name__)
        clf = self.set_clf_params(clf, params, stream.name)
        local_result = []
        for i in range(self.test_size):
            stream.prepare_for_use()
            stream.name = stream.basename if stream.name == None else stream.name
            path_to_save = clf.__class__.__name__ + \
                "_performance_on_"+stream.name+"_"+self.date+".csv"
            evaluator = EvaluatePrequential(
                show_plot=False, max_samples=self.max_samples, restart_stream=True, batch_size=10, metrics=self.metrics, output_file=path_to_save)
            evaluator.evaluate(stream=stream, model=clf)
            saved_metric = pd.read_csv(
                path_to_save, comment='#', header=0).astype(np.float32)
            saved_values = saved_metric.values[:, 1:3]
            saved_values.setflags(write=1)
            stds = np.std(saved_values, axis=0).tolist()
            sliding_mean = [np.mean(saved_metric.values[:, 2], axis=0)]
            output = np.array([[m for m in evaluator._data_buffer.data[n]["mean"]] for n in evaluator._data_buffer.data]+[
                [evaluator.running_time_measurements[0]._total_time]]).T.flatten().tolist()+sliding_mean+stds
            print(path_to_save+" "+str(output))
            local_result.append(output)

        clf_result = np.mean(local_result, axis=0).tolist()

        return [clf.__class__.__name__]+clf_result
def test_evaluate_prequential_classifier(tmpdir, test_path):
    # Setup file stream
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=4,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    max_samples = 1000
    metrics = ['kappa', 'kappa_t', 'performance']
    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream, model=learner)
    result_learner = result[0]

    assert isinstance(result_learner, HoeffdingTree)

    assert learner.get_model_measurements == result_learner.get_model_measurements

    expected_file = os.path.join(test_path, 'prequential_summary.csv')
    compare_files(output_file, expected_file)
예제 #6
0
def demo(input_file, output_file=None):
    """ _test_mtr_regression

    This demo demonstrates how to evaluate a Multi-Target Regressor. The
    employed dataset is 'scm1d', which is contained in the data folder.

    Parameters
    ----------
    input_file: string
        A string describind the path for the input dataset

    output_file: string
        The name of the csv output file

    """
    stream = RegressionGenerator(n_samples=5000, n_features=20,
                                 n_informative=15, random_state=1,
                                 n_targets=7)
    stream.prepare_for_use()

    classifier = MultiTargetRegressionHoeffdingTree(leaf_prediction='adaptive')

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=1, batch_size=1, n_wait=200,
                                    max_time=1000, output_file=output_file,
                                    show_plot=False,
                                    metrics=['average_mean_square_error',
                                             'average_mean_absolute_error',
                                             'average_root_mean_square_error'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
def run_comparison(data, window = 100, estimators = 50, 
                    anomaly = 0.5, drift_rate = 0.3, output_file = 'results'):
  
# = data = SEAGenerator(classification_function=0, noise_percentage=0.7, random_state=1)
  models = [HalfSpaceTrees(n_features=stream.n_features, window_size=window, 
    n_estimators=estimators, 
    size_limit=0.1*100, 
    anomaly_threshold=anomaly,
    depth=15, 
    random_state=2),
    
    IsolationForestStream(
    window_size=window, n_estimators=estimators,
     anomaly_threshold=anomaly, 
     drift_threshold=drift_rate,
      random_state=None)]


  # Setup the evaluator
  evaluator = EvaluatePrequential(pretrain_size=1, max_samples=1000, show_plot=True, 
                                  metrics=['accuracy', 'f1', 'kappa', 'kappa_m'], 
                                  batch_size=1, output_file = 'results_test.csv')
  # 4. Run the evaluation
  evaluator.evaluate(stream=stream, model=models, model_names=['HSTrees','iForestASD'])
  return
예제 #8
0
    def test_accuracy_stream(self):
        stream = ConceptDriftStream(stream=SEAGenerator(random_state=112,
                                                        noise_percentage=0.1),
                                    drift_stream=SEAGenerator(
                                        random_state=112,
                                        classification_function=1,
                                        noise_percentage=0.1),
                                    random_state=None,
                                    position=20000,
                                    width=50000)
        stream.prepare_for_use()

        clf = GLVQ(prototypes_per_class=6,
                   beta=2,
                   C=None,
                   decay_rate=0.9,
                   gradient_descent="SGD")

        evaluator = EvaluatePrequential(pretrain_size=1,
                                        show_plot=False,
                                        max_samples=20000,
                                        batch_size=1)

        evaluator.evaluate(stream, clf, model_names=['GLVQ'])

        measurements = np.asarray(evaluator.get_measurements()[0])[0]

        self.assertTrue(
            measurements.get_accuracy() >= 0.93,
            msg='Accuracy was {} but has to be greater than 0.93'.format(
                measurements.get_accuracy()))
        self.assertTrue(
            measurements.get_kappa() >= 0.84,
            msg='Kappa was {} but has to be greater than 0.84'.format(
                measurements.get_kappa()))
def demo(instances=2000):
    """ _test_comparison_prequential
    
    This demo will test a prequential evaluation when more than one learner is 
    passed, which makes it a comparison task.
    
    Parameters
    ----------
    instances: int
        The evaluation's maximum number of instances.
     
    """
    # Stream setup
    stream = FileStream("../datasets/covtype.csv", -1, 1)
    # stream = SEAGenerator(classification_function=2, sample_seed=53432, balance_classes=False)
    stream.prepare_for_use()
    # Setup the classifier
    clf = SGDClassifier()
    # classifier = KNNAdwin(k=8, max_window_size=2000,leaf_size=40, categorical_list=None)
    # classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    clf_one = KNNAdwin(k=8, max_window_size=1000, leaf_size=30)
    # clf_two = KNN(k=8, max_window_size=1000, leaf_size=30)
    # clf_two = LeverageBagging(h=KNN(), ensemble_length=2)

    t_one = OneHotToCategorical([[10, 11, 12, 13],
                                 [
                                     14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
                                     24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
                                     34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
                                     44, 45, 46, 47, 48, 49, 50, 51, 52, 53
                                 ]])
    # t_two = OneHotToCategorical([[10, 11, 12, 13],
    #                        [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
    #                        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]])

    pipe_one = Pipeline([('one_hot_to_categorical', t_one), ('KNN', clf_one)])
    # pipe_two = Pipeline([('one_hot_to_categorical', t_two), ('KNN', clf_two)])

    classifier = [clf, pipe_one]
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    # pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=2000,
                                    output_file='teste.csv',
                                    max_samples=instances,
                                    batch_size=1,
                                    n_wait=200,
                                    max_time=1000,
                                    show_plot=True,
                                    metrics=['performance', 'kappa_t'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=classifier)
def demo_parameterized(h, dset="sea_stream.csv", show_plot=True): 
    # Setup Stream
    opt = FileOption("FILE", "OPT_NAME", "../datasets/"+dset, "CSV", False)
    stream = FileStream(opt, -1, 1)
    stream.prepare_for_use()

    # For each classifier, e...
    T_init = 100
    eval = EvaluatePrequential(pretrain_size=T_init, output_file='output.csv', max_instances=10000, batch_size=1, n_wait=1000, task_type='classification', show_plot=show_plot, plot_options=['performance'])
    eval.eval(stream=stream, classifier=h)
예제 #11
0
def evaluate(stream, metrics, study_size):
    clfs, names = init_classifiers()
    stream.prepare_for_use()
    evaluator = EvaluatePrequential(show_plot=False,
                                    batch_size=10,
                                    max_samples=study_size,
                                    metrics=metrics,
                                    output_file=stream.name +
                                    "_memory_other.csv")

    evaluator.evaluate(stream=stream, model=clfs, model_names=names)
예제 #12
0
    def run_comparison(self,
                       stream,
                       stream_n_features,
                       window=100,
                       estimators=50,
                       anomaly=0.5,
                       drift_rate=0.3,
                       result_folder="Generated",
                       max_sample=100000,
                       n_wait=200,
                       metrics=[
                           'accuracy', 'f1', 'kappa', 'kappa_m',
                           'running_time', 'model_size'
                       ]):

        from skmultiflow.anomaly_detection import HalfSpaceTrees
        from source.iforestasd_scikitmultiflow import IsolationForestStream
        from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential

        # Creation f the result csv
        directory_path = 'results/' + str(result_folder)
        self.check_directory(path=directory_path)
        result_file_path = directory_path + '/result_for_WS' + str(
            window) + '_NE' + str(estimators) + '.csv'

        # 2. Prepare for use This function is usefull to have data window by window
        # stream.prepare_for_use() # Deprecated so how to prepare data?

        models = [
            HalfSpaceTrees(n_features=stream_n_features,
                           window_size=window,
                           n_estimators=estimators,
                           anomaly_threshold=anomaly),
            #IForest ASD use all the window_size for the sample in the training phase
            IsolationForestStream(window_size=window,
                                  n_estimators=estimators,
                                  anomaly_threshold=anomaly,
                                  drift_threshold=drift_rate)
        ]
        # Setup the evaluator
        evaluator = EvaluatePrequential(pretrain_size=1,
                                        max_samples=max_sample,
                                        show_plot=True,
                                        metrics=metrics,
                                        batch_size=1,
                                        output_file=result_file_path,
                                        n_wait=n_wait)
        # 4. Run the evaluation
        evaluator.evaluate(stream=stream,
                           model=models,
                           model_names=['HSTrees', 'iForestASD'])
        print("")
        print("Please find evaluation results here " + result_file_path)
        return directory_path
예제 #13
0
def demo(output_file=None, instances=40000):
    """ _test_prequential
    
    This demo shows how to produce a prequential evaluation.
    
    The first thing needed is a stream. For this case we use a file stream 
    which gets its samples from the sea_big.csv file, inside the datasets 
    folder.
    
    Then we need to setup a classifier, which in this case is an instance 
    of sklearn's PassiveAggressiveClassifier. Then, optionally we create a 
    pipeline structure, initialized on that classifier.
    
    The evaluation is then run.
    
    Parameters
    ----------
    output_file: string
        The name of the csv output file
    
    instances: int
        The evaluation's max number of instances
    
    """
    # Setup the File Stream
    stream = FileStream("../data/datasets/sea_big.csv", -1, 1)
    # stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    # classifier = SGDClassifier()
    # classifier = KNNAdwin(n_neighbors=8, max_window_size=2000,leaf_size=40, nominal_attributes=None)
    # classifier = OzaBaggingAdwin(base_estimator=KNN(n_neighbors=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    classifier = PassiveAggressiveClassifier()
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(
        pretrain_size=200,
        max_samples=instances,
        batch_size=1,
        n_wait=100,
        max_time=1000,
        output_file=output_file,
        show_plot=True,
        metrics=['kappa', 'kappa_t', 'performance'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
예제 #14
0
파일: rrslvq.py 프로젝트: fmschleif/bix
    def test_stream(self):
        stream = SEAGenerator(classification_function = 2, random_state = 112, balance_classes = False, noise_percentage = 0.28)     
        stream.prepare_for_use()

        evaluator = EvaluatePrequential(show_plot=False,max_samples=5000, 
                restart_stream=True,batch_size=10,metrics=['kappa', 'kappa_m', 'accuracy']) 

        evaluator.evaluate(stream=stream, model=RRSLVQ(prototypes_per_class=4,sigma=10))

        measurements = np.asarray(evaluator.get_measurements()[0])[0]
        self.assertIsNotNone(eval)
        self.assertTrue(measurements.get_accuracy() >= 0.5,
                        msg='Accuracy was {} but has to be greater than 0.5'.
                        format(measurements.get_accuracy()))
def demo_parameterized(h, filename="covtype.csv", show_plot=True):
    # Setup Stream
    stream = FileStream("../datasets/" + filename, -1, 1)
    stream.prepare_for_use()

    # For each classifier, e...
    pretrain = 100
    evaluator = EvaluatePrequential(pretrain_size=pretrain,
                                    output_file='output.csv',
                                    max_samples=10000,
                                    batch_size=1,
                                    n_wait=1000,
                                    show_plot=show_plot,
                                    metrics=['performance'])
    evaluator.evaluate(stream=stream, model=h)
예제 #16
0
def demo(output_file=None, instances=40000):
    """ _test_prequential_bagging
    
    This demo shows the evaluation process of a LeverageBagging classifier, 
    initialized with KNN classifiers.
    
    Parameters
    ----------
    output_file: string
        The name of the csv output file
    
    instances: int
        The evaluation's max number of instances
    
    """
    # Setup the File Stream
    # opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_big.csv", "CSV", False)
    # stream = FileStream(opt, -1, 1)
    stream = SEAGenerator(classification_function=2,
                          instance_seed=755437,
                          noise_percentage=0.0)
    stream.prepare_for_use()

    # Setup the classifier
    #classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    classifier = LeverageBagging(h=KNN(k=8, max_window_size=2000,
                                       leaf_size=30),
                                 ensemble_length=1)

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(
        pretrain_size=2000,
        max_instances=instances,
        batch_size=1,
        n_wait=200,
        max_time=1000,
        output_file=output_file,
        task_type='classification',
        show_plot=True,
        plot_options=['kappa', 'kappa_t', 'performance'])

    # Evaluate
    eval.eval(stream=stream, classifier=pipe)
def demo(output_file=None, instances=40000):
    """ _test_prequential_mol

    This demo shows the evaluation process of a MOL classifier, initialized 
    with sklearn's SGDClassifier.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    #opt = FileOption("FILE", "OPT_NAME", "../datasets/music.csv", "CSV", False)
    #stream = FileStream(opt, 0, 6)
    stream = MultilabelGenerator(n_samples=instances)
    #stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    classifier = MultiOutputLearner(SGDClassifier(n_iter=100))
    #classifier = SGDClassifier()
    #classifier = PassiveAggressiveClassifier()
    #classifier = SGDRegressor()
    #classifier = PerceptronMask()

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(
        pretrain_size=5000,
        max_instances=instances - 10000,
        batch_size=1,
        n_wait=200,
        max_time=1000,
        output_file=output_file,
        task_type='multi_output',
        show_plot=True,
        plot_options=['hamming_score', 'j_index', 'exact_match'])

    # Evaluate
    eval.eval(stream=stream, classifier=pipe)
예제 #18
0
def demo():

    # The classifier we will use (other options: SAMKNN, LeverageBagging, SGD)
    h = HoeffdingTree()

    # Setup Stream
    stream = FileStream("../datasets/sea_stream.csv", -1, 1)
    stream.prepare_for_use()

    pretrain = 100
    evaluator = EvaluatePrequential(pretrain_size=pretrain,
                                    output_file='output.csv',
                                    max_samples=10000,
                                    batch_size=1,
                                    n_wait=1000,
                                    show_plot=True,
                                    metrics=['performance'])
    evaluator.evaluate(stream=stream, model=h)
예제 #19
0
def demo(output_file=None, instances=40000):
    """ _test_regression

    This demo demonstrates how to evaluate a regressor. The data stream used 
    is an instance of the RegressionGenerator, which feeds an instance from 
    sklearn's SGDRegressor.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    #opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False)
    #stream = FileStream(opt, -1, 1)
    #stream = WaveformGenerator()
    #stream.prepare_for_use()
    stream = RegressionGenerator(n_samples=40000)
    # Setup the classifier
    #classifier = SGDClassifier()
    #classifier = PassiveAggressiveClassifier()
    classifier = SGDRegressor()
    #classifier = PerceptronMask()

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(pretrain_size=1,
                               max_instances=instances,
                               batch_size=1,
                               n_wait=1,
                               max_time=1000,
                               output_file=output_file,
                               task_type='regression',
                               show_plot=True,
                               plot_options=['true_vs_predicts'])

    # Evaluate
    eval.eval(stream=stream, classifier=pipe)
예제 #20
0
파일: example.py 프로젝트: fmschleif/bix
def stream_example():
    """Create stream"""
    stream = SEAGenerator(noise_percentage=0.1)

    stream.prepare_for_use()
    """Init BRSLVQ"""
    clf = [
        RSLVQ(sigma=5.0, batch_size=1, n_epochs=1),
        RSLVQ(sigma=5.0, batch_size=5, n_epochs=1),
        RSLVQ(sigma=5.0, batch_size=10, n_epochs=1)
    ]
    """Evaluate"""
    evaluator = EvaluatePrequential(max_samples=10000,
                                    batch_size=100,
                                    show_plot=True)
    """Start evaluation"""
    evaluator.evaluate(stream=stream,
                       model=clf,
                       model_names=['bs=1', 'bs=5', 'bs=10'])
예제 #21
0
def demo(output_file=None, instances=40000):
    """ _test_prequential_mol

    This demo shows the evaluation process of a MOL classifier, initialized 
    with sklearn's SGDClassifier.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    stream = MultilabelGenerator(n_samples=instances)
    # stream = WaveformGenerator()

    # Setup the classifier
    classifier = MultiOutputLearner(SGDClassifier(n_iter=100))
    # classifier = SGDClassifier()
    # classifier = PassiveAggressiveClassifier()
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(
        pretrain_size=5000,
        max_samples=instances - 10000,
        batch_size=1,
        n_wait=200,
        max_time=1000,
        output_file=output_file,
        show_plot=True,
        metrics=['hamming_score', 'j_index', 'exact_match'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
def demo(output_file=None, instances=40000):
    """ _test_regression

    This demo demonstrates how to evaluate a regressor. The data stream used
    is an instance of the RegressionGenerator, which feeds an instance from
    sklearn's SGDRegressor.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    # stream = FileStream("../data/datasets/covtype.csv", -1, 1)
    # stream = WaveformGenerator()
    # stream.prepare_for_use()
    stream = RegressionGenerator(n_samples=40000)
    # Setup the classifier
    # classifier = SGDClassifier()
    # classifier = PassiveAggressiveClassifier()
    classifier = RegressionHoeffdingTree()
    # classifier = PerceptronMask()

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=1,
                                    max_samples=instances,
                                    batch_size=1,
                                    n_wait=200,
                                    max_time=1000,
                                    output_file=output_file,
                                    show_plot=False,
                                    metrics=['mean_square_error'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
예제 #23
0
def demo():

    # The classifier we will use (other options: SAMKNN, LeverageBagging, SGD)
    h = HoeffdingTree()

    # Setup Stream
    opt = FileOption("FILE", "OPT_NAME", "../datasets/sea_stream.csv", "CSV",
                     False)
    stream = FileStream(opt, -1, 1)
    stream.prepare_for_use()

    T_init = 100
    eval = EvaluatePrequential(pretrain_size=T_init,
                               output_file='output.csv',
                               max_instances=10000,
                               batch_size=1,
                               n_wait=1000,
                               task_type='classification',
                               show_plot=True,
                               plot_options=['performance'])
    eval.eval(stream=stream, classifier=h)
def demo(output_file=None, instances=40000):
    """ _test_prequential_bagging
    
    This demo shows the evaluation process of a LeverageBagging classifier, 
    initialized with KNN classifiers.
    
    Parameters
    ----------
    output_file: string
        The name of the csv output file
    
    instances: int
        The evaluation's max number of instances
    
    """
    # Setup the File Stream
    # stream = FileStream("../datasets/sea_big.csv", -1, 1)
    #stream = SEAGenerator(classification_function=2, noise_percentage=0.0)
    #stream.prepare_for_use()
    stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    #classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    #classifier = LeverageBagging(h=KNN(k=8, max_window_size=2000, leaf_size=30), ensemble_length=1)
    pipe = LeverageBagging(h=HoeffdingTree(), ensemble_length=2)

    # Setup the pipeline
    #pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=2000,
                                    max_samples=instances,
                                    output_file=output_file,
                                    show_plot=False)

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
예제 #25
0
def demo():
    """ _test_pipeline
    
    This demo demonstrates the Pipeline structure seemingly working as a 
    learner, while being passed as parameter to an EvaluatePrequential 
    object.
     
    """
    # # Setup the stream
    # opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False)
    # stream = FileStream(opt, -1, 1)
    # stream.prepare_for_use()
    # # If used for Hoeffding Trees then need to pass indices for Nominal attributes

    # Test with RandomTreeGenerator
    # stream = RandomTreeGenerator(n_classes=2, n_numerical_attributes=5)
    # stream.prepare_for_use()

    # Test with WaveformGenerator
    stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    #classifier = PerceptronMask()
    #classifier = NaiveBayes()
    #classifier = PassiveAggressiveClassifier()
    classifier = HoeffdingTree()

    # Setup the pipeline
    pipe = Pipeline([('Hoeffding Tree', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(show_plot=True,
                               pretrain_size=1000,
                               max_instances=100000)

    # Evaluate
    eval.eval(stream=stream, classifier=pipe)
예제 #26
0
파일: grlvq.py 프로젝트: fmschleif/bix
    def test_accuracy_stream(self):
        stream = SEAGenerator(random_state=42)
        stream.prepare_for_use()

        clf = GRLVQ(prototypes_per_class=2, regularization=5.0, beta=2, C=None)

        evaluator = EvaluatePrequential(pretrain_size=1,
                                        show_plot=False,
                                        max_samples=20000,
                                        batch_size=1)

        evaluator.evaluate(stream, clf, model_names=['GRLVQ'])

        measurements = np.asarray(evaluator.get_measurements()[0])[0]

        self.assertTrue(
            measurements.get_accuracy() >= 0.7,
            msg='Accuracy was {} but has to be greater than 0.7'.format(
                measurements.get_accuracy()))
        self.assertTrue(
            measurements.get_kappa() >= 0.3,
            msg='Kappa was {} but has to be greater than 0.3'.format(
                measurements.get_kappa()))
def evaluate_prequential(stream,
                         model,
                         pretrain_size=0.1,
                         window_size=20,
                         plot=False,
                         output=None):
    stream.restart()
    pretrain_samples = round(stream.n_remaining_samples() * pretrain_size)
    batch_size = round(
        (stream.n_remaining_samples() - pretrain_samples) / window_size)
    print("Pretrain size (examples):", pretrain_samples)
    print("Batch size (examples):", batch_size)
    evaluator = EvaluatePrequential(show_plot=plot,
                                    pretrain_size=pretrain_samples,
                                    batch_size=batch_size,
                                    max_samples=1000000,
                                    metrics=[
                                        "exact_match", "hamming_score",
                                        "hamming_loss", "j_index",
                                        "running_time", "model_size"
                                    ],
                                    output_file=output)
    evaluator.evaluate(stream=stream, model=model)
예제 #28
0
    def self_job(self,stream,clf,grid,metrics,max_samples):
        results = []
        matrix = list(itertools.product(*[list(v) for v in grid.values()]))
        for param_tuple in matrix:
            try: clf.reset()
            except NotImplementedError: clf.__init__()
            for i,param in enumerate(param_tuple):
                clf.__dict__[list(grid.keys())[i]] = int(param) if param.dtype == 'int32' else param
            stream.prepare_for_use()
            evaluator = EvaluatePrequential(show_plot=False,max_samples=self.max_samples, restart_stream=True,batch_size=10,metrics=metrics)
            evaluator.evaluate(stream=stream, model=clf)
            results.append(list(param_tuple)+np.array([[m for m in evaluator._data_buffer.data[n]["mean"]] for n in evaluator._data_buffer.data]).T.flatten().tolist())
        s_name = stream.basename if stream.name==None else stream.name
        dfr = pd.DataFrame(results,columns=list(self.grid.keys())+np.array([[*evaluator._data_buffer.data]]).flatten().tolist())
        dfr = dfr.round(3)

        self.chwd_root()
        os.chdir(os.path.join(os.getcwd(),self.path))

        dfr.to_csv(path_or_buf="Result_"+"_"+self.date+"_"+s_name+"_"+self.clf.__class__.__name__+".csv")
        print("\n ------------------ \n")
        print("Best run on "+s_name+" with "+" "+self.clf.__class__.__name__+" "+str(dfr.values[dfr["accuracy"].values.argmax()]))
        return [s_name]+[self.clf.__class__.__name__]+dfr.values[dfr["accuracy"].values.argmax()].tolist()
예제 #29
0
    def test_reoccuring(self):
        s1 = MIXEDGenerator(classification_function = 1, random_state= 112, balance_classes = False)
        s2 = MIXEDGenerator(classification_function = 0, random_state= 112, balance_classes = False)
        stream = ReoccuringDriftStream(stream=s1,
                                drift_stream=s2,
                                random_state=None,
                                alpha=90.0, # angle of change grade 0 - 90
                                position=2000,
                                width=500)
        stream.prepare_for_use()
        evaluator = EvaluatePrequential(show_plot=False,batch_size=10,
                                        max_samples=1000,
                                        metrics=['accuracy', 'kappa_t', 'kappa_m', 'kappa'],    
                                        output_file=None)
        eval = evaluator.evaluate(stream=stream, model=OzaBaggingAdwin(base_estimator=KNN()))
        

        measurements = np.asarray(evaluator.get_measurements()[0])[0]
        
        self.assertIsNotNone(eval)
        self.assertTrue(measurements.get_accuracy() >= 0.6,
                        msg='Accuracy was {} but has to be greater than 0.6'.
                        format(measurements.get_accuracy()))
예제 #30
0
from skmultiflow.data.file_stream import FileStream
from skmultiflow.trees.hoeffding_tree import HoeffdingTreeClassifier
from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential

# Create a stream
stream = FileStream("elec.csv")
stream.prepare_for_use()  # Not required for v0.5.0+

# Instantiate the HoeffdingTreeClassifier
ht = HoeffdingTreeClassifier()

# Setup the evaluator
evaluator = EvaluatePrequential(pretrain_size=1000,
                                max_samples=10000,
                                output_file='results.csv')

# Run evaluation
evaluator.evaluate(stream=stream, model=ht)