def load_data(dataset): etl = ETL(DATA_PATH, [128, 256, 512, 1024], sma_window=3, minimal_movement=0.75) etl.load(dataset) etl.preprocess_pooled() etl.generate_fourier_dataset(window_overlap=1)
def generate_fourier(data_path, window_sizes, size, params): etl = ETL( data_path=data_path, window_sizes=window_sizes, sma_window=params["sma"], minimal_movement=params["minimal_movement"], size=size ) etl.load("CIMA") print("\nPreprocessing data.") etl.preprocess_pooled() print("\nGenerating fourier data.") etl.generate_fourier_dataset(window_overlap=params["window_overlap"])
def cv(model_name): kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) angles = [ "right_shoulder", "left_shoulder", "right_elbow", "left_elbow", "right_hip", "left_hip", "right_knee", "left_knee" ] window_sizes = [128, 256, 512, 1024] etl = ETL(DATA_PATH, [128, 256, 512, 1024], sma_window=3, minimal_movement=0.75) etl.load("CIMA") infants = np.array(list(etl.cima.keys())) labels = np.array([etl.cima[infant]["label"] for infant in infants]) etl.preprocess_pooled() etl.generate_fourier_dataset(window_overlap=1) X = pd.DataFrame() for train_index, test_index in kf.split(infants, labels): ids = infants[train_index] id_hash = f"{model_name}_{sha1(ids).hexdigest()[:5]}" model_path = f"saved_models/{id_hash}.joblib" if os.path.exists(model_path): models = joblib.load(model_path) else: models = {} for window_size in window_sizes: for angle in angles: fourier_path = os.path.join(DATA_PATH, str(window_size), angle + ".json") df = pd.read_json(fourier_path) X = X.append(df) X = X[X.id.isin(ids)] y = X["label"] X = pd.DataFrame(X.data.tolist()) # model_name = f"{window_size}_{model_name}" models[window_size] = train_model(model_name, X, y, save=False) joblib.dump(models, model_path) x_test = infants[test_index] y_test = labels[test_index] score = evaluate_model(id_hash, models, x_test, y_test)
from etl.etl import ETL from matplotlib import pyplot as plt etl = ETL("/home/erlend/datasets", [128, 256, 512, 1024], size=16, random_seed=42) etl.cache = False etl.load("CIMA") infant = etl.cima["077"] infant = etl.resample(infant) before_sma = infant["data"]["right_wrist_x"][:250] etl.preprocess_pooled() after_sma = etl.cima["077"]["data"]["right_wrist_x"][:250] fig = plt.Figure() plt.plot(before_sma, color="red", alpha=0.5) plt.plot(after_sma, color="green", alpha=0.5) plt.xlabel("Frame") plt.ylabel("right_wrist_x") plt.legend(["Raw data", "SMA=3"]) plt.savefig("sma.png")
def load_validation_set(data_path): etl = ETL(data_path, [128, 256, 512, 1024]) etl.load("CIMA", validation=True) etl.preprocess_pooled() return etl.cima