Beispiel #1
0
def test_run_clustering_experiment():
    """Test running and saving results for clustering.

    Currently it just checks the files have been created, then deletes them.
    """
    dataset = "UnitTest"
    train_X, train_Y = load_unit_test("TRAIN", return_X_y=True)
    test_X, test_Y = load_unit_test("TEST", return_X_y=True)
    run_clustering_experiment(
        train_X,
        TimeSeriesKMeans(n_clusters=2),
        results_path="../Temp/",
        trainY=train_Y,
        testX=test_X,
        testY=test_Y,
        cls_name="kmeans",
        dataset_name=dataset,
        resample_id=0,
    )
    test_path = f"../Temp/kmeans/Predictions/{dataset}/testResample0.csv"
    train_path = f"../Temp/kmeans/Predictions/{dataset}/trainResample0.csv"
    assert os.path.isfile(test_path)
    assert os.path.isfile(train_path)
    os.remove(test_path)
    os.remove(train_path)
Beispiel #2
0
def test_run_clustering_experiment(tmp_path):
    """Test running and saving results for clustering.

    Currently it just checks the files have been created, then deletes them.
    """
    dataset = "UnitTest"
    train_X, train_Y = load_unit_test("TRAIN")
    test_X, test_Y = load_unit_test("TEST")
    run_clustering_experiment(
        train_X,
        TimeSeriesKMeans(n_clusters=2),
        results_path=tmp_path,
        trainY=train_Y,
        testX=test_X,
        testY=test_Y,
        cls_name="kmeans",
        dataset_name=dataset,
        resample_id=0,
    )
    test_path = tmp_path.joinpath(
        f"kmeans/Predictions/{dataset}/testResample0.csv")
    train_path = tmp_path.joinpath(
        f"kmeans/Predictions/{dataset}/trainResample0.csv")
    assert test_path.is_file()
    assert train_path.is_file()
    # remove files
    test_path.unlink()
    train_path.unlink()
Beispiel #3
0
        data_dir = "../datasets/data/"
        results_dir = "C:/Temp/Clusterers/"
        dataset = "UnitTest"
        clusterer = "kmeans"
        resample = 0
        tf = True
        clst = TimeSeriesKMeans(n_clusters=2)
        load_and_run_clustering_experiment(
            overwrite=True,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name=clusterer,
            dataset=dataset,
            resample_id=resample,
            train_file=tf,
            clusterer=clst,
        )
        train_X, train_Y = load_ts(data_dir + dataset + "/" + dataset +
                                   "_TRAIN.ts")
        test_X, test_Y = load_ts(data_dir + dataset + "/" + dataset +
                                 "_TEST.ts")
        run_clustering_experiment(
            train_X,
            clst,
            results_path=results_dir + "Temp/",
            trainY=train_Y,
            testX=test_X,
            testY=test_Y,
            cls_name=clusterer,
        )
        window = tune_window(distance, train_X)
        name = clusterer + "-" + distance + "-tuned"
    else:
        name = clusterer + "-" + distance
    if (distance == "wdtw" or distance == "dwdtw" or distance == "dtw"
            or distance == "wdtw"):
        parameters = {"window": 0.2, "epsilon": 0.05, "g": 0.05, "c": 1}
    else:
        parameters = {"window": 1.0, "epsilon": 0.05, "g": 0.05, "c": 1}
    clst = TimeSeriesKMeans(
        averaging_method="dba",
        average_params={"averaging_distance_metric": distance},
        metric=distance,
        distance_params=parameters,
        n_clusters=len(set(train_Y)),
        random_state=resample + 1,
    )
    run_clustering_experiment(
        train_X,
        clst,
        results_path=results_dir,
        trainY=train_Y,
        testX=test_X,
        testY=test_Y,
        cls_name=name,
        dataset_name=dataset,
        resample_id=resample,
        overwrite=True,
    )
    print("done")