예제 #1
0
def load_data(dataset):
    etl = ETL(DATA_PATH, [128, 256, 512, 1024],
              sma_window=3,
              minimal_movement=0.75)
    etl.load(dataset)
    etl.preprocess_pooled()
    etl.generate_fourier_dataset(window_overlap=1)
예제 #2
0
def generate_fourier(data_path, window_sizes, size, params):
    etl = ETL(
        data_path=data_path,
        window_sizes=window_sizes,
        sma_window=params["sma"],
        minimal_movement=params["minimal_movement"],
        size=size
    )
    etl.load("CIMA")
    print("\nPreprocessing data.")
    etl.preprocess_pooled()
    print("\nGenerating fourier data.")
    etl.generate_fourier_dataset(window_overlap=params["window_overlap"])
예제 #3
0
def cv(model_name):
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    angles = [
        "right_shoulder", "left_shoulder", "right_elbow", "left_elbow",
        "right_hip", "left_hip", "right_knee", "left_knee"
    ]
    window_sizes = [128, 256, 512, 1024]

    etl = ETL(DATA_PATH, [128, 256, 512, 1024],
              sma_window=3,
              minimal_movement=0.75)

    etl.load("CIMA")

    infants = np.array(list(etl.cima.keys()))
    labels = np.array([etl.cima[infant]["label"] for infant in infants])

    etl.preprocess_pooled()
    etl.generate_fourier_dataset(window_overlap=1)

    X = pd.DataFrame()
    for train_index, test_index in kf.split(infants, labels):

        ids = infants[train_index]

        id_hash = f"{model_name}_{sha1(ids).hexdigest()[:5]}"
        model_path = f"saved_models/{id_hash}.joblib"

        if os.path.exists(model_path):
            models = joblib.load(model_path)
        else:
            models = {}
            for window_size in window_sizes:
                for angle in angles:
                    fourier_path = os.path.join(DATA_PATH, str(window_size),
                                                angle + ".json")
                    df = pd.read_json(fourier_path)
                    X = X.append(df)
                X = X[X.id.isin(ids)]
                y = X["label"]
                X = pd.DataFrame(X.data.tolist())

                # model_name = f"{window_size}_{model_name}"
                models[window_size] = train_model(model_name, X, y, save=False)
            joblib.dump(models, model_path)

        x_test = infants[test_index]
        y_test = labels[test_index]

        score = evaluate_model(id_hash, models, x_test, y_test)