コード例 #1
0
def test_resampletomean():
    # ******* setting up DINTModel
    dm = SchemaMatcher(host="localhost", port=8080)

    logging.info("Cleaning models from DINT server")
    for m in dm.models:
        dm.remove_model(m)
    logging.info("Cleaning datasets from DINT server")
    for ds in dm.datasets:
        dm.remove_dataset(ds)

    m1 = create_dint_model(dm, "full", "ResampleToMean")
    m2 = create_dint_model(dm, "single", "ResampleToMean")
    m3 = create_dint_model(dm, "full_chardist", "ResampleToMean")
    m4 = create_dint_model(dm, "noheader", "ResampleToMean")
    m5 = create_dint_model(dm, "chardistonly", "ResampleToMean")

    models = [m1, m2, m3, m4, m5]

    loo_experiment = Experiment(
        models,
        experiment_type="leave_one_out",
        description="plain loo",
        result_csv=os.path.join('results',
                                "performance_dint_resampletomean.csv"),
        debug_csv=os.path.join("results", "debug_dint_resampletomean.csv"))

    loo_experiment.run()
コード例 #2
0
def test_models_holdout():
    # ******* setting up DINTModel
    dm = SchemaMatcher(host="localhost", port=8080)

    logging.info("Cleaning models from DINT server")
    for m in dm.models:
        dm.remove_model(m)
    logging.info("Cleaning datasets from DINT server")
    for ds in dm.datasets:
        dm.remove_dataset(ds)

    m1 = create_dint_model(dm, "full", "NoResampling")
    m2 = create_dint_model(dm, "single", "NoResampling")
    m3 = create_dint_model(dm, "chardist", "NoResampling")
    m4 = create_dint_model(dm, "noheader", "NoResampling")
    m5 = create_dint_model(dm, "chardistonly", "NoResampling")
    rf_model = NNetModel(['rf@charfreq'],
                         'rf@charfreq model: no headers',
                         add_headers=False,
                         p_header=0,
                         debug_csv=os.path.join("results",
                                                "debug_nnet_rf_holdout.csv"))

    models = [m1, m2, m3, m4, m5, rf_model]

    rhold_experiment = Experiment(
        models,
        experiment_type="repeated_holdout",
        description="repeated_holdout_0.5_10",
        result_csv=os.path.join('results', "performance_models_holdout.csv"),
        debug_csv=os.path.join("results", "debug_holdout.csv"),
        holdout=0.5,
        num=10)

    rhold_experiment.run()
コード例 #3
0
        datasets[1].column('Bureau of Meteorology station number'):
        'station-number',
    },
    resampling_strategy=resampling_strategy)
print(model.summary)

print()
print("Now we should see the new model in the list")
print(dm.models)

#
# remove a model...
#
print()
print("We can also remove models")
dm.remove_model(model.id)
print(dm.models)

#==============
#
# Let's evaluate a model...
#
#==============

#
# User labelled data
#
training_data = {
    datasets[0].column('Quality'): 'data-quality',
    datasets[0].column('Year'): 'year',
    datasets[0].column('Month'): 'month',