예제 #1
0
def data_extraction(path):
    acc = data_loader(path, sensor_type=ACC)
    gyro = data_loader(path, sensor_type=GYRO)
    sound = data_loader(path, sensor_type=SOUND)
    acc = window_segmentation(acc)
    gyro = window_segmentation(gyro)
    sound = window_segmentation(sound)
    window_length = min(len(acc), len(gyro))
    window_length = min(len(sound), window_length)
    if window_length == 0:
        return None, None, None, None, False
    acc = acc[:window_length]
    gyro = gyro[:window_length]
    sound = sound[:window_length]
    return acc, gyro, sound, window_length, True
예제 #2
0
def perform_pca():
    X_train, y_train, X_test, y_test = preprocessing.data_loader()
    scaler = StandardScaler()
    x_train_scaled = scaler.fit_transform(X_train)
    X_train = pd.DataFrame(x_train_scaled, index=X_train.index, columns=X_train.columns)
    X_test = pd.DataFrame(scaler.fit_transform(X_test), index=X_test.index, columns=X_test.columns)
    pca = PCA(0.99)
    pca.fit(X_train)
    print(pca.components_.shape)
    columns = ['pca_%i' % i for i in range(pca.components_.shape[0])]
    X_train = pd.DataFrame(pca.transform(X_train), columns=columns, index=X_train.index)
    X_test = pd.DataFrame(pca.transform(X_test), columns=columns, index=X_test.index)
    return X_train, X_test, y_train, y_test
예제 #3
0
def perform_lda():
    X_train, y_train, X_test, y_test = preprocessing.data_loader()
    scaler = StandardScaler()
    x_train_scaled = scaler.fit_transform(X_train)
    X_train = pd.DataFrame(x_train_scaled, index=X_train.index, columns=X_train.columns)
    X_test = pd.DataFrame(scaler.fit_transform(X_test), index=X_test.index, columns=X_test.columns)
    lda = LinearDiscriminantAnalysis()
    lda.fit(X_train, y_train)
    columns = ['lda_%i' % i for i in range(lda.explained_variance_ratio_.shape[0])]
    X_train = pd.DataFrame(lda.transform(X_train), columns=columns, index=X_train.index)
    X_test = pd.DataFrame(lda.transform(X_test), columns=columns, index=X_test.index)
    print(X_train.head())
    print(X_test.head())
    return X_train, X_test, y_train, y_test
예제 #4
0
def data_extraction(path):
    acc = data_loader(path, sensor_type=ACC)
    acc = window_segmentation(acc)
    return acc, acc.shape[0]
예제 #5
0
if __name__ == "__main__":

    # read train and test csv file
    train = pd.read_csv("./data/train.csv")
    # test = pd.read_csv("./data/test.csv")

    # preprocess the data
    train = preprocess(train)

    print(train.shape)

    # train validation split
    train, validation, _, _ = train_test_split(train, train, test_size=0.2, random_state=42)

    # construct dataloader
    train_data_loader = data_loader(train, tokenizer, MAX_LEN, BATCH_SIZE)
    val_data_loader = data_loader(validation, tokenizer, MAX_LEN, BATCH_SIZE)
    # test_data_loader = data_loader(test, tokenizer, MAX_LEN, BATCH_SIZE)

    # construct model
    model = SentimentClassifier(n_classes = 3)
    model = model.to(device)

    # define AdamW optimizer from the tranformers package
    optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)

    # total steps during training process
    total_steps = len(train_data_loader) * EPOCHS

    # use a warm-up scheduler as suggested in the paper
    scheduler = get_linear_schedule_with_warmup(