Esempio n. 1
0
def nn_SCM():
    X, y_orig = load_features_data(correlation_threshold=0.05)
    y = prepare_multiclass_target(y_orig)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

    # normalize X
    X_train = normalize_X(X_train)
    X_test = normalize_X(X_test)

    start = time.time()

    model = MLPClassifier(hidden_layer_sizes=(100, 100, 50), activation='relu', solver='adam', verbose=False, max_iter=1000,
                          alpha=0.0001, batch_size=100, warm_start=True)

    model = model.fit(X_train, y_train)
    print(f"trained in {time.time() - start} sec")
    y_pred = model.predict(X_test)

    scores = get_scores_for_cross_val(model, X, y)
    print('-'*15, 'CROSS VALIDATION SCORES', '-'*15)
    for score in scores:
        print(score, ": ", scores[score], " Average:", np.mean(scores[score]))

    scores = get_final_metrics(y_test, y_pred)
    print('-'*15, 'FINAL SCORES SINGLE CLASSIFIER MODEL NEURAL NETWORK', '-'*15)
    for score in scores:
        print(score, ":\n", scores[score])
def nn_EWCM_simple():
    X, y_orig = load_features_data(correlation_threshold=0.05)
    X_train_orig, X_test_orig, y_train_orig, y_test_orig = train_test_split(X, y_orig, test_size=0.2, random_state=42,
                                                                            shuffle=True)

    start = time.time()
    print('-'*15, 'SEPARATIONG WALK AND NON-WALK', '-'*15)
    X_train_true, X_train_false, y_train_true, y_train_false, X_test_true, \
               X_test_false, y_test_true, y_test_false = make_binary_classification(X_train_orig, X_test_orig, y_train_orig,
                                                                                    y_test_orig, ['WALK'])

    # result test set, add all trips which were classifies as WALK
    res = X_test_true
    res['mode'] = 'WALK'

    print('-'*15, 'SEPARATING ALL OTHER CLASSES', '-'*15)
    X_test_false = X_test_false.sort_index()
    to_append = X_test_false

    y_pred = make_multiclass_classification(X_train_false, X_test_false, y_train_false)
    print(f"trained in {time.time() - start} sec")

    to_append['mode'] = y_pred

    # get precitions from both parts together
    res = res.append(to_append)
    res = res.sort_index()
    y_test_orig = y_test_orig.sort_index()
    y_pred = res['mode']
    scores = get_final_metrics(y_test_orig, y_pred)
    print('-' * 15, 'FINAL SCORES ENSEMBLE CLASSIFIER MODEL NEURAL NETWORK (SIMPLE)', '-' * 15)
    for score in scores:
        print(score, ":\n", scores[score])
Esempio n. 3
0
def ensemble_classifier_resuts(training_data: np.array, labels: np.array) -> tuple:
    x = training_data
    y = pd.DataFrame.copy(labels)
    y = prepare_binary_target(y, ['WALK'])

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

    dtc = DecisionTreeClassifier(criterion='entropy')

    start = time.time()
    dtc.fit(x_train, y_train)
    print(f"trained in {time.time() - start} sec")

    y_pred = dtc.predict(x_test)

    binary_classifying_results = get_final_metrics(y_test, y_pred)

    walk_data_idx = list()
    idx = -1
    for _, row in x.iterrows():
        data_sample = np.array(row).reshape(1, -1)
        prediction = dtc.predict(data_sample)
        idx += 1
        if prediction == 1:  # if prediction label is 1 -> walk data sample
            walk_data_idx.append(idx)

    # remove 'walk' predicated data from data set and corresponding label array
    non_walk_data_to_learn = x.drop(x.index[[walk_data_idx]])
    y = pd.DataFrame.copy(labels)
    non_walk_labels = y.drop(y.index[[walk_data_idx]])

    # train on the non walk classified data
    # labels := {'TRAM', 'TRAIN', 'METRO', 'CAR', 'BUS' 'BICYCLE'}
    non_walk_labels = prepare_multiclass_target(non_walk_labels)

    x_train, x_test, y_train, y_test = train_test_split(non_walk_data_to_learn, non_walk_labels, test_size=0.2, random_state=1)

    start = time.time()
    dtc.fit(x_train, y_train)
    print(f"trained in {time.time() - start} sec")

    y_pred = dtc.predict(x_test)

    non_walk_predictions = get_final_metrics(y_test, y_pred)

    return binary_classifying_results, non_walk_predictions
Esempio n. 4
0
def single_classifier(X: pd.DataFrame, y: np.ndarray, kernel_name="linear"):
    print(f"*** SINGLE CLASSIFIER ***")
    print(f"\tkernel: {kernel_name}")

    # converts string target values to numbers
    y = prepare_multiclass_target(y)

    # retrieve model object from models_dict
    svc_model = models_dict[kernel_name]

    # split data
    x_train, x_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42,
                                                        shuffle=True)
    print(f"shape x_train: {x_train.shape}")
    print(f"shape y_train: {y_train.shape}")
    print(f"shape x_test: {x_test.shape}")
    print(f"shape y_test: {y_test.shape}")

    print()
    print("*" * 15)
    print(f"MODEL: svc-{kernel_name}")

    start = time.time()

    # train model
    svc_model.fit(x_train, y_train)

    # predict test_data
    y_pred = svc_model.predict(x_test)

    print(f"trained and predicted in {time.time() - start} sec")
    print()

    # confusion matrix
    print("confusion matrix: ")
    labels = np.unique(y_pred)
    c_matrix = confusion_matrix(y_test, y_pred, labels=labels)
    print(c_matrix)
    print()

    # print metrics
    print("scoring in all metrics: ")
    scoring = get_final_metrics(y_test, y_pred)
    print(scoring)
    print()

    # cross validation
    print("scores in cross validation:")
    scores = get_scores_for_cross_val(svc_model, X, y)
    for score in scores:
        print(score, ": ", scores[score])
    print()
    return
def make_binary_classification(X_train_orig, X_test_orig, y_train_orig, y_test_orig, mode):
    y_train = prepare_binary_target(y_train_orig, mode)
    y_test = prepare_binary_target(y_test_orig, mode)

    # normalize X
    X_train = normalize_X(X_train_orig)
    X_test = normalize_X(X_test_orig)

    model = MLPClassifier(hidden_layer_sizes=(80, 60), activation='relu', solver='adam', verbose=False, max_iter=1000,
                          alpha=0.0001, batch_size=100, warm_start=True)

    # cross validation scores
    scores = get_scores_for_cross_val(model, X_train, y_train)
    for score in scores:
        print(score, ": ", scores[score], " Average:", np.mean(scores[score]))

    model = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    scores = get_final_metrics(y_test, y_pred)
    print('-' * 15, 'SCORES BINARY CLASSIFIER WALK/NON-WALK', '-' * 15)
    for score in scores:
        print(score, ":\n", scores[score])

    # get indices of trips classified as true in test set
    true_index = list()
    for i, row in X_test.iterrows():
        data_sample = np.array(row).reshape(1, -1)
        prediction = model.predict(data_sample)
        if prediction == 1:  # if prediction label is 1 -> walk data sample
            true_index.append(i)

    # separate "true" and "false" trips
    X_test_false = X_test_orig.drop(true_index)
    y_test_false = y_test_orig.drop(true_index)
    X_test_true = X_test_orig.loc[true_index]
    y_test_true = y_test_orig.loc[true_index]

    # remove walk data from X train
    true_index = list()
    for i, _ in X_train.iterrows():
        for m in mode:
            if y_train_orig.loc[i, 'mode'] == m:  # if label is 1 -> walk data sample
                true_index.append(i)
                break

    # separate "true" and "false" trips
    X_train_false = X_train_orig.drop(true_index)
    y_train_false = y_train_orig.drop(true_index)
    X_train_true = X_train_orig.loc[true_index]
    y_train_true = y_train_orig.loc[true_index]

    return X_train_true, X_train_false, y_train_true, y_train_false, X_test_true, \
           X_test_false, y_test_true, y_test_false
def single_classifier_results(training_data: np.array,
                              labels: np.array) -> dict:
    x = training_data
    y = pd.DataFrame.copy(labels)
    y = prepare_multiclass_target(y)

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=1)

    dtc = DecisionTreeClassifier(criterion='gini')
    start = time.time()
    dtc.fit(x_train, y_train)
    print(f"trained in {time.time() - start} sec")

    y_pred = dtc.predict(x_test)

    # measure accuracy of training
    scores = get_final_metrics(y_test, y_pred)
    return scores
Esempio n. 7
0
def ensemble_classifier(X: pd.DataFrame, y: np.ndarray, kernel_name="linear"):
    print(f"*** ENSEMBLE CLASSIFIER ***")
    print(f"\tkernel: {kernel_name}")

    # split data
    x_train, x_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42,
                                                        shuffle=True)

    print(f"\tshape x_train: {x_train.shape}")
    print(f"\tshape y_train: {y_train.shape}")

    start = time.time()

    # consecutive binary classification
    for mode in mode_list:
        print()
        print(f"*** {mode} ***")

        # prepare target vector (the matching mode is set to 1 all others to 0)
        y_test_binary = prepare_binary_target(y_test, [mode])
        y_train_binary = prepare_binary_target(y_train, [mode])

        # retrieve model object from models_dict
        svc_model = models_dict[kernel_name]

        print(f"\tshape x_test: {x_test.shape}")
        print(f"\tshape y_test: {y_test_binary.shape}")
        print()

        # train model
        svc_model.fit(x_train, y_train_binary)

        # compute confusion matrix:
        y_pred = svc_model.predict(x_test)
        c_matrix = confusion_matrix(y_test_binary, y_pred)
        print("confusion matrix: ")
        print(c_matrix)
        print()

        # print metrics
        print("scoring in all metrics: ")
        scoring = get_final_metrics(y_test_binary, y_pred)
        [print(f"\t{key}: {value}") for key, value in scoring.items()]
        print()

        # remove as target mode classified objects from TEST data only
        true_index_list = list()
        for index, row in x_test.iterrows():
            data_sample = np.array(row).reshape(1, -1)
            prediction = svc_model.predict(data_sample)
            if prediction == 1:  # if prediction label is 1 -> walk data sample
                true_index_list.append(index)
        y_test = y_test.drop(true_index_list)
        x_test = x_test.drop(true_index_list)
        print(f"*****" * 10)

    print(f"trained and predicted in {time.time() - start} sec")
    print()
    return
Esempio n. 8
0
def nn_EWCM_umbrella():
    X, y_orig = load_features_data(correlation_threshold=0.05)
    X_train_orig, X_test_orig, y_train_orig, y_test_orig = train_test_split(X, y_orig, test_size=0.2, random_state=42,
                                                                            shuffle=True)
    start = time.time()

    print('-'*15, 'SEPARATIONG WALK AND NON-WALK', '-'*15)
    X_train_true, X_train_false, y_train_true, y_train_false, X_test_true, \
               X_test_false, y_test_true, y_test_false = make_binary_classification(X_train_orig, X_test_orig, y_train_orig,
                                                                                    y_test_orig, ['WALK'])
    # result test set, add all trips which were classifies as WALK
    res = X_test_true
    res['mode'] = 'WALK'

    # separate rail and road
    print('-'*15, 'SEPARATIONG RAIL AND ROAD', '-'*15)
    rail = ['TRAIN', 'METRO', 'TRAM']
    road = ['BICYCLE', 'CAR', 'BUS']
    X_train_rail, X_train_road, y_train_rail, y_train_road, X_test_rail, \
               X_test_road, y_test_rail, y_test_road = make_binary_classification(X_train_orig, X_test_false,
                                                                                    y_train_orig, y_test_false, rail)

    _, X_test_false, _, y_test_false = X_train_rail, X_test_rail, y_train_rail, y_test_rail
    for mode in rail:
        print('-' * 15, 'SEPARATIONG ', mode.upper() +' AND NON-', mode.upper(), '-' * 15)
        _, _, _, _, X_test_true, X_test_false, _, y_test_false = make_binary_classification(X_train_orig, X_test_false,
                                                                           y_train_orig, y_test_false, [mode])
        # add classifies trips to result test set
        to_append = X_test_true
        to_append['mode'] = mode
        res = res.append(to_append)
    # non-classifies trips
    to_append_X = X_test_false
    to_append_y = y_test_false

    _, X_test_false,_, y_test_false = X_train_road, X_test_road, y_train_road, y_test_road
    # add non-classified trips to ROAD
    X_test_false = X_test_false.append(to_append_X)
    y_test_false = y_test_false.append(to_append_y)
    for mode in road:
        print('-' * 15, 'SEPARATIONG ', mode.upper() +' AND NON-', mode.upper(), '-' * 15)
        _, _, _, _, X_test_true, X_test_false, _, y_test_false = make_binary_classification(X_train_orig, X_test_false,
                                                                                    y_train_orig, y_test_false, [mode])
        # add classifies trips to result test set
        to_append = X_test_true
        to_append['mode'] = mode
        res = res.append(to_append)

    to_append = X_test_false
    to_append['mode'] = 'WALK'  # set all trips which were not classified to walk
    res = res.append(to_append)

    y_pred = res['mode']
    y_pred = y_pred.sort_index()
    y_test_orig = y_test_orig.sort_index()

    print(f"trained in {time.time() - start} sec")

    scores = get_final_metrics(y_test_orig, y_pred)
    print('-'*15, 'FINAL SCORES ENSEMBLE CLASSIFIER NEURAL NETWORK (UMBRELLA)', '-'*15)
    for score in scores:
        print(score, ":\n", scores[score])