コード例 #1
0
def test_evaluate_holdout_classifier(tmpdir, test_path):
    # Setup file stream
    stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=4, n_cat_features=2,
                                 n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    n_wait = 200
    max_samples = 1000
    metrics = ['kappa', 'kappa_t', 'performance']
    output_file = os.path.join(str(tmpdir), "holdout_summary.csv")
    evaluator = EvaluateHoldout(n_wait=n_wait,
                                max_samples=max_samples,
                                test_size=50,
                                metrics=metrics,
                                output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream, model=learner)
    result_learner = result[0]

    assert isinstance(result_learner, HoeffdingTree)

    assert learner.get_model_measurements == result_learner.get_model_measurements

    expected_file = os.path.join(test_path, 'holdout_summary.csv')
    compare_files(output_file, expected_file)
コード例 #2
0
def test_evaluate_holdout_classifier(tmpdir, test_path):
    # Setup file stream
    stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=4, n_cat_features=2,
                                 n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTreeClassifier(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    n_wait = 200
    max_samples = 1000
    metrics = ['accuracy', 'kappa', 'kappa_t']
    output_file = os.path.join(str(tmpdir), "holdout_summary.csv")
    evaluator = EvaluateHoldout(n_wait=n_wait,
                                max_samples=max_samples,
                                test_size=50,
                                metrics=metrics,
                                output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream, model=learner)
    result_learner = result[0]

    assert isinstance(result_learner, HoeffdingTreeClassifier)

    assert learner.get_model_measurements == result_learner.get_model_measurements

    expected_file = os.path.join(test_path, 'holdout_summary.csv')
    compare_files(output_file, expected_file)

    mean_performance, current_performance = evaluator.get_measurements(model_idx=0)

    expected_mean_accuracy = 0.344000
    assert np.isclose(mean_performance.accuracy_score(), expected_mean_accuracy)

    expected_mean_kappa = 0.135021
    assert np.isclose(mean_performance.kappa_score(), expected_mean_kappa)

    expected_mean_kappa_t = 0.180000
    assert np.isclose(mean_performance.kappa_t_score(), expected_mean_kappa_t)

    expected_current_accuracy = 0.360000
    assert np.isclose(current_performance.accuracy_score(), expected_current_accuracy)

    expected_current_kappa = 0.152542
    assert np.isclose(current_performance.kappa_score(), expected_current_kappa)

    expected_current_kappa_t = 0.200000
    assert np.isclose(current_performance.kappa_t_score(), expected_current_kappa_t)

    expected_info = "EvaluateHoldout(batch_size=1, dynamic_test_set=False, max_samples=1000,\n" \
                    "                max_time=inf, metrics=['accuracy', 'kappa', 'kappa_t'],\n" \
                    "                n_wait=200,\n" \
                    "                output_file='holdout_summary.csv',\n" \
                    "                restart_stream=True, show_plot=False, test_size=50)"
    assert evaluator.get_info() == expected_info
コード例 #3
0
def demo(output_file=None, instances=40000):
    """ _test_comparison_holdout
    
    This demo will test a holdout evaluation task when more than one learner is 
    evaluated, which makes it a comparison task. 
    
    Parameters
    ----------
    output_file: string, optional
        If passed this parameter indicates the output file name. If left blank, 
        no output file will be generated.
    
    instances: int (Default: 40000)
        The evaluation's maximum number of instances.
    
    """
    # Setup the File Stream
    # stream = FileStream("../data/datasets/covtype.csv", -1, 1)
    stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    clf_one = HoeffdingTree()
    # clf_two = KNNAdwin(n_neighbors=8, max_window_size=2000)
    # classifier = PassiveAggressiveClassifier()
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    classifier = [clf_one]

    # Setup the evaluator
    evaluator = EvaluateHoldout(test_size=500,
                                dynamic_test_set=True,
                                max_samples=instances,
                                batch_size=1,
                                n_wait=5000,
                                max_time=1000,
                                output_file=output_file,
                                show_plot=True,
                                metrics=['kappa'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=classifier)
コード例 #4
0
def demo(output_file=None, instances=40000):
    """ _test_holdout
    
    This demo runs a holdout evaluation task with one learner. The default 
    stream is a WaveformGenerator. The default learner is a SGDClassifier, 
    which is inserted into a Pipeline structure. All the default values can 
    be changing by uncommenting/commenting the code below.
    
    Parameters
    ----------
    output_file: string
        The name of the csv output file
    
    instances: int
        The evaluation's max number of instances
         
    """
    # Setup the File Stream
    # stream = FileStream("../data/datasets/covtype.csv", -1, 1)
    stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    classifier = SGDClassifier()
    # classifier = PassiveAggressiveClassifier()
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluateHoldout(test_size=2000,
                                dynamic_test_set=True,
                                max_samples=instances,
                                batch_size=1,
                                n_wait=15000,
                                max_time=1000,
                                output_file=output_file,
                                show_plot=True,
                                metrics=['kappa', 'kappa_t', 'performance'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
コード例 #5
0
def test_evaluate_holdout_classifier(tmpdir, test_path):
    # Setup file stream
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=4,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    n_wait = 200
    max_samples = 1000
    metrics = ['accuracy', 'kappa', 'kappa_t']
    output_file = os.path.join(str(tmpdir), "holdout_summary.csv")
    evaluator = EvaluateHoldout(n_wait=n_wait,
                                max_samples=max_samples,
                                test_size=50,
                                metrics=metrics,
                                output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream, model=learner)
    result_learner = result[0]

    assert isinstance(result_learner, HoeffdingTree)

    assert learner.get_model_measurements == result_learner.get_model_measurements

    expected_file = os.path.join(test_path, 'holdout_summary.csv')
    compare_files(output_file, expected_file)

    mean_performance, current_performance = evaluator.get_measurements(
        model_idx=0)
    expected_mean_accuracy = 0.344000
    expected_mean_kappa = 0.135021
    expected_mean_kappa_t = 0.180000
    expected_current_accuracy = 0.360000
    expected_current_kappa = 0.152542
    expected_current_kappa_t = 0.200000
    assert np.isclose(mean_performance.get_accuracy(), expected_mean_accuracy)
    assert np.isclose(mean_performance.get_kappa(), expected_mean_kappa)
    assert np.isclose(mean_performance.get_kappa_t(), expected_mean_kappa_t)
    assert np.isclose(current_performance.get_accuracy(),
                      expected_current_accuracy)
    assert np.isclose(current_performance.get_kappa(), expected_current_kappa)
    assert np.isclose(current_performance.get_kappa_t(),
                      expected_current_kappa_t)

    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    n_wait = 200
    max_samples = 1000
    metrics = ['f1', 'precision', 'recall', 'gmean']
    evaluator = EvaluateHoldout(n_wait=n_wait,
                                max_samples=max_samples,
                                test_size=50,
                                metrics=metrics)

    # Evaluate
    evaluator.evaluate(stream=stream, model=learner)
    mean_performance, current_performance = evaluator.get_measurements(
        model_idx=0)

    expected_current_f1_score = 0.6818181818181818
    expected_current_precision = 0.625
    expected_current_recall = 0.75
    expected_current_g_mean = 0.7245688373094719
    expected_mean_f1_score = 0.6431718061674009
    expected_mean_precision = 0.5748031496062992
    expected_mean_recall = 0.73
    expected_mean_g_mean = 0.6835202996254025

    assert np.isclose(current_performance.get_f1_score(),
                      expected_current_f1_score)
    assert np.isclose(current_performance.get_precision(),
                      expected_current_precision)
    assert np.isclose(current_performance.get_recall(),
                      expected_current_recall)
    assert np.isclose(current_performance.get_g_mean(),
                      expected_current_g_mean)
    assert np.isclose(mean_performance.get_f1_score(), expected_mean_f1_score)
    assert np.isclose(mean_performance.get_precision(),
                      expected_mean_precision)
    assert np.isclose(mean_performance.get_recall(), expected_mean_recall)
    assert np.isclose(mean_performance.get_g_mean(), expected_mean_g_mean)
コード例 #6
0
# Prequential evaluation
eval1 = EvaluatePrequential(pretrain_size=400,
                            max_samples=300000,
                            batch_size=1,
                            n_wait=100,
                            max_time=2000,
                            show_plot=False,
                            metrics=['accuracy'])

eval1.evaluate(stream=ds, model=model_hat)

# Holdout evaluation
eval2 = EvaluateHoldout(max_samples=30000,
                        max_time=2000,
                        show_plot=False,
                        metrics=['accuracy'],
                        dynamic_test_set=True)

eval2.evaluate(stream=ds, model=model_hat)
###################################################

# Extremely Fast Decision Tree
from skmultiflow.trees import ExtremelyFastDecisionTreeClassifier
from skmultiflow.data import ConceptDriftStream
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.evaluation import EvaluateHoldout

# Simulate a sample data stream
ds = ConceptDriftStream(random_state=777, position=30000)
ds
コード例 #7
0
    print("Chosen regressor:", "Adaptive Random Forest")
else:
    print("Invalid Model Specified. Expected: KNN, HAT or ARF")
    parser.print_usage()
    exit()

evaluator = None
mode = None
if not args.all_ap:
    #evaluator = EvaluatePrequential(output_file=model_name+"_eval_one_label.txt",show_plot=args.show_plot, pretrain_size=200, max_samples=max_samples, metrics=['true_vs_predicted','mean_square_error','mean_absolute_error'])
    if args.holdout:
        evaluator = EvaluateHoldout(
            output_file=model_name + "_eval_one_label_v2_holdout.txt",
            show_plot=args.show_plot,
            n_wait=60,
            test_size=60,
            batch_size=60,
            max_samples=max_samples,
            metrics=[
                'true_vs_predicted', 'mean_square_error', 'mean_absolute_error'
            ])
    else:
        evaluator = EvaluatePrequential(
            output_file=model_name + "_eval_one_label_v2.txt",
            show_plot=args.show_plot,
            n_wait=60,
            pretrain_size=60,
            batch_size=60,
            max_samples=max_samples,
            metrics=[
                'true_vs_predicted', 'mean_square_error',
                'mean_absolute_error', 'running_time', 'model_size'
コード例 #8
0
def custom_evaluation(datastreams, clfs, stream_length, Prequential=False):

    eval_results = []
    eval_time = 0
    eval_time = 0
    eval_acc = 0
    eval_kappa = 0
    eval_kappam = 0
    eval_kappat = 0
    ev = ['Holdout', 'Prequential']
    mod = clfs[0]
    resultpath = ""
    rdf = []

    stream = datastreams[0]
    stream.prepare_for_use()
    #print(stream.get_data_info())
    #print(datastream_names[index])

    if Prequential == True:
        resultpath = "results/Prequential/" + ev[1] + "_" + datastreams[
            1] + "_" + clfs[1] + ".csv"
        evaluator = EvaluatePrequential(max_samples=stream_length,
                                        metrics=[
                                            'accuracy', 'kappa', 'kappa_t',
                                            'kappa_m', 'running_time'
                                        ])
        eval_text = ev[1]
    else:
        resultpath = "results/Holdout/" + ev[0] + "_" + datastreams[
            1] + "_" + clfs[1] + ".csv"
        evaluator = EvaluateHoldout(max_samples=stream_length,
                                    metrics=[
                                        'accuracy', 'kappa', 'kappa_t',
                                        'kappa_m', 'running_time'
                                    ])
        eval_text = ev[0]

    print('')
    print(eval_text + ' evaluation for ' + datastreams[1] + ' stream:')
    try:
        evaluator.evaluate(stream=stream, model=mod)

        eval_results.append(evaluator.get_mean_measurements())
        eval_time = evaluator.running_time_measurements[0]._total_time

        for i, item in enumerate(eval_results, start=0):
            eval_acc = item[0].get_accuracy()
            eval_kappa = item[0].get_kappa()
            eval_kappam = item[0].get_kappa_m()
            eval_kappat = item[0].get_kappa_t()
    except Exception as e:
        print(e)

    print('')
    print(eval_text + ' evaluation for ' + datastreams[1] + ' stream finished')

    #try:
    #    evaluator.evaluate(stream=stream, model=mod)

    #    eval_results_prequel.append(evaluator.get_mean_measurements())
    #except Exception as e:
    #    print(e)

    #print('')
    #print('Prequential evaluation for '+datastreams[1]+' stream finished')

    print('')
    print('Results for the ' + eval_text + ' eval:')
    print('')
    print(clfs[1] + ' :')
    print('Accuracy: ' + str(round(eval_acc, 4)))
    print('Kappa: ' + str(round(eval_kappa, 4)))
    print('Kappa_m: ' + str(round(eval_kappam, 4)))
    print('Kappa_t: ' + str(round(eval_kappat, 4)))
    print('Total comp. time: ' + str(round(eval_time, 2)))

    try:
        #create dataframe with the ruslts for the datastream and the now active clf and save it as csv
        rdf_data = [[
            datastreams[1], clfs[1],
            str(round(eval_acc, 4)),
            str(round(eval_kappa, 4)),
            str(round(eval_kappam, 4)),
            str(round(eval_kappat, 4)),
            str(round(eval_time, 2))
        ]]
        rdf = pd.DataFrame(rdf_data,
                           columns=[
                               'Stream', 'Clf', 'Accuracy', 'Kappa', 'Kappa_m',
                               'Kappa_t', 'total comp. time'
                           ])
        rdf.to_csv(resultpath, index=None, header=True)
    except Exception as e:
        print(e)

    #print('Total comp. time: '+ str(round(item[j].get_kappa_t(), 4)))

    print('')