def data_extraction(path): acc = data_loader(path, sensor_type=ACC) gyro = data_loader(path, sensor_type=GYRO) sound = data_loader(path, sensor_type=SOUND) acc = window_segmentation(acc) gyro = window_segmentation(gyro) sound = window_segmentation(sound) window_length = min(len(acc), len(gyro)) window_length = min(len(sound), window_length) if window_length == 0: return None, None, None, None, False acc = acc[:window_length] gyro = gyro[:window_length] sound = sound[:window_length] return acc, gyro, sound, window_length, True
def perform_pca(): X_train, y_train, X_test, y_test = preprocessing.data_loader() scaler = StandardScaler() x_train_scaled = scaler.fit_transform(X_train) X_train = pd.DataFrame(x_train_scaled, index=X_train.index, columns=X_train.columns) X_test = pd.DataFrame(scaler.fit_transform(X_test), index=X_test.index, columns=X_test.columns) pca = PCA(0.99) pca.fit(X_train) print(pca.components_.shape) columns = ['pca_%i' % i for i in range(pca.components_.shape[0])] X_train = pd.DataFrame(pca.transform(X_train), columns=columns, index=X_train.index) X_test = pd.DataFrame(pca.transform(X_test), columns=columns, index=X_test.index) return X_train, X_test, y_train, y_test
def perform_lda(): X_train, y_train, X_test, y_test = preprocessing.data_loader() scaler = StandardScaler() x_train_scaled = scaler.fit_transform(X_train) X_train = pd.DataFrame(x_train_scaled, index=X_train.index, columns=X_train.columns) X_test = pd.DataFrame(scaler.fit_transform(X_test), index=X_test.index, columns=X_test.columns) lda = LinearDiscriminantAnalysis() lda.fit(X_train, y_train) columns = ['lda_%i' % i for i in range(lda.explained_variance_ratio_.shape[0])] X_train = pd.DataFrame(lda.transform(X_train), columns=columns, index=X_train.index) X_test = pd.DataFrame(lda.transform(X_test), columns=columns, index=X_test.index) print(X_train.head()) print(X_test.head()) return X_train, X_test, y_train, y_test
def data_extraction(path): acc = data_loader(path, sensor_type=ACC) acc = window_segmentation(acc) return acc, acc.shape[0]
if __name__ == "__main__": # read train and test csv file train = pd.read_csv("./data/train.csv") # test = pd.read_csv("./data/test.csv") # preprocess the data train = preprocess(train) print(train.shape) # train validation split train, validation, _, _ = train_test_split(train, train, test_size=0.2, random_state=42) # construct dataloader train_data_loader = data_loader(train, tokenizer, MAX_LEN, BATCH_SIZE) val_data_loader = data_loader(validation, tokenizer, MAX_LEN, BATCH_SIZE) # test_data_loader = data_loader(test, tokenizer, MAX_LEN, BATCH_SIZE) # construct model model = SentimentClassifier(n_classes = 3) model = model.to(device) # define AdamW optimizer from the tranformers package optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False) # total steps during training process total_steps = len(train_data_loader) * EPOCHS # use a warm-up scheduler as suggested in the paper scheduler = get_linear_schedule_with_warmup(