Beispiel #1
0
def dl_veh_type(train_df, test_df, opt, vehicle_or_not_train_opt,
                vehicle_type_train_opt, train_test_opt, features):
    # train vehicle type model seperately
    evaluation.init_write(
        opt,
        [vehicle_or_not_train_opt, vehicle_type_train_opt, train_test_opt],
        [features['VEHICLE_OR_NOT_FEATURES'], features['VEHICLE_TYPE']],
        train_df, test_df)
    print(evaluation.evaluation_report.write)
    if vehicle_type_train_opt['random_seed'] is not None:
        np.random.seed(vehicle_type_train_opt['random_seed'])
    vehicle_type_train_win_df = train_df.copy()
    vehicle_type_test_win_df = test_df.copy()

    vehicle_type_index = preprocessing.get_feature_idx(
        features['VEHICLE_TYPE'], features['ALL_FEATURES'])

    vehicle_type_test_win_df = vehicle_type_test_win_df[
        (vehicle_type_test_win_df[opt['test_label_type']] == 4)
        | (vehicle_type_test_win_df[opt['test_label_type']] == 3) |
        (vehicle_type_test_win_df[opt['test_label_type']] == 2)]

    vehicle_type_train_win_df = vehicle_type_train_win_df[
        (vehicle_type_train_win_df[opt['train_label_type']] == 2)
        | (vehicle_type_train_win_df[opt['train_label_type']] == 3)
        | (vehicle_type_train_win_df[opt['train_label_type']] == 4)]

    preprocessing.reassign_label(vehicle_type_train_win_df,
                                 vehicle_type_test_win_df,
                                 [[2, 0], [3, 1], [4, 2]])
    # features_test, labels_test = preprocessing.balance_dataset(
    #     vehicle_type_test_win_df.iloc[:, vehicle_type_index],
    #     vehicle_type_test_win_df[opt['test_label_type']])
    logging.info("Start to train vehicle_type model")
    train_models.train_dl_veh_type_model(
        vehicle_type_train_win_df.iloc[:, vehicle_type_index],
        vehicle_type_train_win_df[opt['train_label_type']],
        vehicle_type_train_opt,
        vehicle_type_test_win_df.iloc[:, vehicle_type_index],
        vehicle_type_test_win_df[opt['test_label_type']])
Beispiel #2
0
def ml_one_train_test(ml_opt, train_df, test_df, opt, features):
    # Use ml to train All-In-One Model

    evaluation.init_write(
        opt, None,
        [features['VEHICLE_OR_NOT_FEATURES'], features['VEHICLE_TYPE']],
        train_df, test_df)

    # ~~~~~~~~~~~~~~ train vehicle/Non-vehicle model ~~~~~~~~~~~~~~~~
    one_model_train_win_df = train_df.copy()
    one_model_test_win_df = test_df.copy()

    one_model_index = preprocessing.get_feature_idx(features['ONE_MODEL'],
                                                    features['ALL_FEATURES'])
    preprocessing.reassign_label(one_model_train_win_df, one_model_test_win_df,
                                 [[1, 0], [2, 1], [3, 2], [4, 3], [5, 0]])
    logging.info("Start to train ml one model")
    start_time = time.time()
    one_model = train_models.train_ml_model(
        np.array(one_model_train_win_df.iloc[:, one_model_index]),
        np.array(one_model_train_win_df[opt['train_label_type']]), ml_opt)
    logging.info("Finished to train ml one_model model")
    print("it took", time.time() - start_time, "seconds.")
    logging.info("Start to evaluate ml one model")
    valid_all_based = preprocessing.remove_mix(one_model_test_win_df,
                                               'all_based_win_label')
    evaluation.evaluate_single_ml_model(
        one_model, np.array(valid_all_based.iloc[:, one_model_index]),
        np.array(valid_all_based['all_based_win_label']),
        ['not vehicle', 'mrt', 'bus', 'car'], opt['folder_name'])
    evaluation.evaluate_single_ml_model(
        one_model, np.array(one_model_test_win_df.iloc[:, one_model_index]),
        np.array(one_model_test_win_df['last_based_win_label']),
        ['not vehicle', 'mrt', 'bus', 'car'], opt['folder_name'])

    evaluation.save_write(opt['folder_name'])
    # print(evaluation.evaluation_report.write)
    del one_model_test_win_df, one_model_train_win_df, one_model_index, one_model
Beispiel #3
0
def dl_train_test(train_df, test_df, opt, vehicle_or_not_train_opt,
                  vehicle_type_train_opt, train_test_opt, features):
    # Use Deep Learning to train hierarchical model
    evaluation.init_write(
        opt,
        [vehicle_or_not_train_opt, vehicle_type_train_opt, train_test_opt],
        [features['VEHICLE_OR_NOT_FEATURES'], features['VEHICLE_TYPE']],
        train_df, test_df)
    print(evaluation.evaluation_report.write)
    # ~~~~~~~~~~~~~~ train vehicle/Non-vehicle model ~~~~~~~~~~~~~~~~
    if vehicle_or_not_train_opt['random_seed'] is not None:
        np.random.seed(vehicle_or_not_train_opt['random_seed'])
    vehicle_or_not_train_win_df = train_df.copy()
    vehicle_or_not_test_win_df = test_df.copy()

    vehicle_or_not_index = preprocessing.get_feature_idx(
        features['VEHICLE_OR_NOT_FEATURES'], features['ALL_FEATURES'])
    preprocessing.reassign_label(vehicle_or_not_train_win_df,
                                 vehicle_or_not_test_win_df,
                                 [[1, 0], [2, 1], [3, 1], [4, 1], [5, 0]])
    logging.info("Start to train vehicle_or_not model")
    # features_test, labels_test = preprocessing.balance_dataset(
    #     vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index],
    #     vehicle_or_not_test_win_df[opt['test_label_type']])
    vehicle_or_not_model = train_models.train_dl_veh_or_not_model(
        vehicle_or_not_train_win_df.iloc[:, vehicle_or_not_index],
        vehicle_or_not_train_win_df[opt['train_label_type']],
        vehicle_or_not_train_opt,
        vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index],
        vehicle_or_not_test_win_df[opt['test_label_type']])
    logging.info("Start to test vehicle_or_not model")
    evaluation.evaluate_single_model(
        vehicle_or_not_model,
        opt['folder_name'],
        'vehicle_or_not',
        np.array(vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index]),
        np.array(vehicle_or_not_test_win_df[opt['test_label_type']]),
        save_model=False)

    # ~~~~~~~~~~~~~~~~ train vehicle_type_model ~~~~~~~~~~~~~~~~
    if vehicle_type_train_opt['random_seed'] is not None:
        np.random.seed(vehicle_type_train_opt['random_seed'])
    vehicle_type_train_win_df = train_df.copy()
    vehicle_type_test_win_df = test_df.copy()

    vehicle_type_index = preprocessing.get_feature_idx(
        features['VEHICLE_TYPE'], features['ALL_FEATURES'])

    vehicle_type_test_win_df = vehicle_type_test_win_df[
        (vehicle_type_test_win_df[opt['test_label_type']] == 4)
        | (vehicle_type_test_win_df[opt['test_label_type']] == 3) |
        (vehicle_type_test_win_df[opt['test_label_type']] == 2)]

    vehicle_type_train_win_df = vehicle_type_train_win_df[
        (vehicle_type_train_win_df[opt['train_label_type']] == 2)
        | (vehicle_type_train_win_df[opt['train_label_type']] == 3)
        | (vehicle_type_train_win_df[opt['train_label_type']] == 4)]

    preprocessing.reassign_label(vehicle_type_train_win_df,
                                 vehicle_type_test_win_df,
                                 [[2, 0], [3, 1], [4, 2]])
    # features_test, labels_test = preprocessing.balance_dataset(
    #     vehicle_type_test_win_df.iloc[:, vehicle_type_index],
    #     vehicle_type_test_win_df[opt['test_label_type']])
    logging.info("Start to train vehicle_type model")
    vehicle_type_model = train_models.train_dl_veh_type_model(
        vehicle_type_train_win_df.iloc[:, vehicle_type_index],
        vehicle_type_train_win_df[opt['train_label_type']],
        vehicle_type_train_opt,
        vehicle_type_test_win_df.iloc[:, vehicle_type_index],
        vehicle_type_test_win_df[opt['test_label_type']])

    logging.info("Start to test vehicle_type model")
    if vehicle_type_train_opt['middle_output'] is True:
        evaluation.evaluate_single_ml_model(
            vehicle_type_model,
            np.array(vehicle_type_test_win_df.iloc[:, vehicle_type_index]),
            np.array(vehicle_type_test_win_df[opt['test_label_type']]),
            ['mrt', 'bus', 'car'], opt['folder_name'])
    else:
        evaluation.evaluate_single_model(
            vehicle_type_model,
            opt['folder_name'],
            'vehicle_type',
            np.array(vehicle_type_test_win_df.iloc[:, vehicle_type_index]),
            np.array(vehicle_type_test_win_df[opt['test_label_type']]),
            save_model=False)
    # print(evaluation.evaluation_report.write)

    # ~~~~~~~~~~~~~~~~~ get overall result ~~~~~~~~~~~~~~~~~~~
    overall_test_win_df = test_df.copy()
    valid_all_based = preprocessing.remove_mix(overall_test_win_df,
                                               'all_based_win_label')
    overall_result_label = evaluation.evaluate_overall_manual_2(
        vehicle_or_not_model, vehicle_type_model, valid_all_based,
        valid_all_based['all_based_win_label'], vehicle_or_not_index,
        vehicle_type_index, opt['smooth_overall_result'])
    overall_result_label = evaluation.evaluate_overall_manual_2(
        vehicle_or_not_model, vehicle_type_model, overall_test_win_df,
        overall_test_win_df['last_based_win_label'], vehicle_or_not_index,
        vehicle_type_index, opt['smooth_overall_result'])
    # ~~~~~~~~~~~~~~~~~ Save predicted result into csv for visualization ~~~~~~~~~~
    evaluation.save_predicted_result_in_csv(overall_result_label,
                                            overall_test_win_df,
                                            opt['folder_name'], 'overall',
                                            opt['test_label_type'])
    evaluation.save_write(opt['folder_name'])

    del overall_test_win_df, vehicle_or_not_test_win_df, vehicle_or_not_train_win_df, vehicle_or_not_index, \
        vehicle_type_model, vehicle_type_train_win_df, vehicle_type_test_win_df, \
        vehicle_type_index
Beispiel #4
0
def lstm_train_test(train_df, test_df, opt, vehicle_or_not_train_opt,
                    vehicle_type_train_opt, train_test_opt, features):
    # Use lstm net to train hierarchical model
    evaluation.init_write(
        opt,
        [vehicle_or_not_train_opt, vehicle_type_train_opt, train_test_opt],
        [features['VEHICLE_OR_NOT_FEATURES'], features['VEHICLE_TYPE']],
        train_df, test_df)
    print(evaluation.evaluation_report.write)
    # # ~~~~~~~~~~~~~~ train vehicle/Non-vehicle model ~~~~~~~~~~~~~~~~
    if vehicle_or_not_train_opt['random_seed'] is not None:
        np.random.seed(vehicle_or_not_train_opt['random_seed'])
    vehicle_or_not_train_win_df = train_df.copy()
    vehicle_or_not_test_win_df = test_df.copy()

    vehicle_or_not_index = preprocessing.get_feature_idx(
        features['VEHICLE_OR_NOT_FEATURES'], features['ALL_FEATURES'])
    preprocessing.reassign_label(vehicle_or_not_train_win_df,
                                 vehicle_or_not_test_win_df,
                                 [[1, 0], [2, 1], [3, 1], [4, 1], [5, 0]])
    logging.info("Start to train vehicle_or_not model")
    vehicle_or_not_model = train_models.train_lstm_model(
        vehicle_or_not_train_win_df.iloc[:, vehicle_or_not_index],
        vehicle_or_not_train_win_df[opt['train_label_type']],
        vehicle_or_not_train_opt)
    logging.info("Start to test vehicle_or_not model")
    features_test = np.reshape(
        np.array(vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index]),
        (len(vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index]), 6,
         len(features['VEHICLE_OR_NOT_FEATURES'])))
    evaluation.evaluate_single_model(
        vehicle_or_not_model,
        opt['folder_name'],
        'vehicle_or_not',
        features_test,
        np.array(vehicle_or_not_test_win_df[opt['test_label_type']]),
        save_model=False)

    # ~~~~~~~~~~~~~~~~ train vehicle_type_model ~~~~~~~~~~~~~~~~
    if vehicle_type_train_opt['random_seed'] is not None:
        np.random.seed(vehicle_type_train_opt['random_seed'])
    vehicle_type_train_win_df = train_df.copy()
    vehicle_type_test_win_df = test_df.copy()

    vehicle_type_index = preprocessing.get_feature_idx(
        features['VEHICLE_TYPE'], features['ALL_FEATURES'])

    vehicle_type_test_win_df = vehicle_type_test_win_df[
        (vehicle_type_test_win_df[opt['test_label_type']] == 4)
        | (vehicle_type_test_win_df[opt['test_label_type']] == 3) |
        (vehicle_type_test_win_df[opt['test_label_type']] == 2)]

    vehicle_type_train_win_df = vehicle_type_train_win_df[
        (vehicle_type_train_win_df[opt['train_label_type']] == 2)
        | (vehicle_type_train_win_df[opt['train_label_type']] == 3)
        | (vehicle_type_train_win_df[opt['train_label_type']] == 4)]

    preprocessing.reassign_label(vehicle_type_train_win_df,
                                 vehicle_type_test_win_df,
                                 [[2, 0], [3, 1], [4, 2]])
    logging.info("Start to train vehicle_type model")
    vehicle_type_model = train_models.train_lstm_model(
        vehicle_type_train_win_df.iloc[:, vehicle_type_index],
        vehicle_type_train_win_df[opt['train_label_type']],
        vehicle_type_train_opt)

    logging.info("Start to test vehicle_type model")
    features_test = np.reshape(
        np.array(vehicle_type_test_win_df.iloc[:, vehicle_type_index]),
        (len(vehicle_type_test_win_df.iloc[:, vehicle_type_index]), 6,
         len(features['VEHICLE_TYPE'])))
    evaluation.evaluate_single_model(
        vehicle_type_model,
        opt['folder_name'],
        'vehicle_type',
        features_test,
        np.array(vehicle_type_test_win_df[opt['test_label_type']]),
        save_model=False)

    # ~~~~~~~~~~~~~~~~~ get overall result ~~~~~~~~~~~~~~~~~~~
    overall_test_win_df = test_df.copy()
    overall_result_label = evaluation.evaluate_overall_lstm(
        vehicle_or_not_model, vehicle_type_model, overall_test_win_df,
        overall_test_win_df[opt['test_label_type']], vehicle_or_not_index,
        vehicle_type_index, opt['smooth_overall_result'])
    # ~~~~~~~~~~~~~~~~~ Save predicted result into csv for visualization ~~~~~~~~~~
    evaluation.save_predicted_result_in_csv(overall_result_label,
                                            overall_test_win_df,
                                            opt['folder_name'], 'overall',
                                            opt['test_label_type'])
    evaluation.save_write(opt['folder_name'])

    del overall_test_win_df, vehicle_or_not_test_win_df, vehicle_or_not_train_win_df, vehicle_or_not_index, \
        vehicle_type_model, vehicle_or_not_model, vehicle_type_train_win_df, vehicle_type_test_win_df, \
        vehicle_type_index
Beispiel #5
0
def dl_one_model_train_test(train_df, test_df, opt, one_model_train_opt,
                            train_test_opt, features):
    # Use dl to train All-In-One model
    evaluation.init_write(opt, [one_model_train_opt, train_test_opt],
                          features['ONE_MODEL'], train_df, test_df)

    # # ~~~~~~~~~~~~~~ train one_model model ~~~~~~~~~~~~~~~~
    if one_model_train_opt['random_seed'] is not None:
        np.random.seed(one_model_train_opt['random_seed'])

    one_model_train_win_df = train_df.copy()
    one_model_test_win_df = test_df.copy()

    one_model_index = preprocessing.get_feature_idx(features['ONE_MODEL'],
                                                    features['ALL_FEATURES'])
    preprocessing.reassign_label(one_model_train_win_df, one_model_test_win_df,
                                 [[1, 0], [2, 1], [3, 2], [4, 3], [5, 0]])
    logging.info("Start to train one_model model")
    start_time = time.time()
    # features_test, labels_test = preprocessing.balance_dataset(
    #     one_model_test_win_df.iloc[:, one_model_index],
    #     one_model_test_win_df[opt['test_label_type']])

    one_model_model = train_models.train_dl_veh_type_model(
        one_model_train_win_df.iloc[:, one_model_index],
        one_model_train_win_df[opt['train_label_type']], one_model_train_opt,
        one_model_test_win_df.iloc[:, one_model_index],
        one_model_test_win_df[opt['test_label_type']])
    logging.info("Finished to train one_model model")
    print("it took", time.time() - start_time, "seconds.")
    logging.info("Start to test one_model model")
    if one_model_train_opt['DLNetwork'] == 'LSTM':
        features_test = np.reshape(
            np.array(one_model_test_win_df.iloc[:, one_model_index]),
            (len(one_model_test_win_df.iloc[:, one_model_index]), 6,
             len(features['ONE_MODEL'])))
        evaluation.evaluate_single_model(
            one_model_model,
            opt['folder_name'],
            'one_model',
            features_test,
            np.array(one_model_test_win_df[opt['test_label_type']]),
            save_model=False)
    else:
        valid_all_based = preprocessing.remove_mix(one_model_test_win_df,
                                                   'all_based_win_label')
        evaluation.evaluate_single_model(
            one_model_model,
            opt['folder_name'],
            'one_model',
            np.array(valid_all_based.iloc[:, one_model_index]),
            np.array(valid_all_based['all_based_win_label']),
            save_model=False)
        evaluation.evaluate_single_model(
            one_model_model,
            opt['folder_name'],
            'one_model',
            np.array(one_model_test_win_df.iloc[:, one_model_index]),
            np.array(one_model_test_win_df['last_based_win_label']),
            save_model=False)

    evaluation.save_write(opt['folder_name'])
Beispiel #6
0
def dl_train_test_3binary(train_df, test_df, opt, vehicle_or_not_train_opt,
                          vehicle_type_train_opt, bus_or_not_train_opt,
                          mrt_or_car_train_opt, train_test_opt, features):
    # Use dl to train bi-bi-binary model
    evaluation.init_write(opt, [
        vehicle_or_not_train_opt, vehicle_type_train_opt, bus_or_not_train_opt,
        mrt_or_car_train_opt, train_test_opt
    ], [
        features['VEHICLE_OR_NOT_FEATURES'], features['BUS_OR_NOT'],
        features['MRT_OR_CAR']
    ], train_df, test_df)

    # # ~~~~~~~~~~~~~~ train vehicle/Non-vehicle model ~~~~~~~~~~~~~~~~
    if vehicle_or_not_train_opt['random_seed'] is not None:
        np.random.seed(vehicle_or_not_train_opt['random_seed'])
    vehicle_or_not_train_win_df = train_df.copy()
    vehicle_or_not_test_win_df = test_df.copy()

    vehicle_or_not_index = preprocessing.get_feature_idx(
        features['VEHICLE_OR_NOT_FEATURES'], features['ALL_FEATURES'])
    preprocessing.reassign_label(vehicle_or_not_train_win_df,
                                 vehicle_or_not_test_win_df,
                                 [[1, 0], [2, 1], [3, 1], [4, 1], [5, 0]])
    logging.info("Start to train vehicle_or_not model")
    # features_test, labels_test = preprocessing.balance_dataset(
    #     vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index],
    #     vehicle_or_not_test_win_df[opt['test_label_type']])
    vehicle_or_not_model = train_models.train_dl_veh_or_not_model(
        vehicle_or_not_train_win_df.iloc[:, vehicle_or_not_index],
        vehicle_or_not_train_win_df[opt['train_label_type']],
        vehicle_or_not_train_opt,
        vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index],
        vehicle_or_not_test_win_df[opt['test_label_type']])
    logging.info("Start to test vehicle_or_not model")
    evaluation.evaluate_single_model(
        vehicle_or_not_model,
        opt['folder_name'],
        'vehicle_or_not',
        np.array(vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index]),
        np.array(vehicle_or_not_test_win_df[opt['test_label_type']]),
        save_model=False)

    # ~~~~~~~~~~~~~~~~ train bus_or_not_model ~~~~~~~~~~~~~~~~~~
    if bus_or_not_train_opt['random_seed'] is not None:
        np.random.seed(bus_or_not_train_opt['random_seed'])

    bus_or_not_train_win_df = train_df.copy()
    bus_or_not_test_win_df = test_df.copy()

    bus_or_not_index = preprocessing.get_feature_idx(features['BUS_OR_NOT'],
                                                     features['ALL_FEATURES'])

    bus_or_not_test_win_df = bus_or_not_test_win_df[
        (bus_or_not_test_win_df[opt['test_label_type']] == 4) |
        (bus_or_not_test_win_df[opt['test_label_type']] == 3) |
        (bus_or_not_test_win_df[opt['test_label_type']] == 2)]

    bus_or_not_train_win_df = bus_or_not_train_win_df[
        (bus_or_not_train_win_df[opt['train_label_type']] == 2)
        | (bus_or_not_train_win_df[opt['train_label_type']] == 3)
        | (bus_or_not_train_win_df[opt['train_label_type']] == 4)]

    preprocessing.reassign_label(bus_or_not_train_win_df,
                                 bus_or_not_test_win_df,
                                 [[2, 0], [3, 1], [4, 0]])
    logging.info("Start to train bus_or_not model")
    # features_test, labels_test = preprocessing.balance_dataset(
    #     bus_or_not_test_win_df.iloc[:, bus_or_not_index],
    #     bus_or_not_test_win_df[opt['test_label_type']])
    bus_or_not_model = train_models.train_dl_veh_type_model(
        bus_or_not_train_win_df.iloc[:, bus_or_not_index],
        bus_or_not_train_win_df[opt['train_label_type']], bus_or_not_train_opt,
        bus_or_not_test_win_df.iloc[:, bus_or_not_index],
        bus_or_not_test_win_df[opt['test_label_type']])

    logging.info("Start to test bus_or_not model")

    evaluation.evaluate_single_model(
        bus_or_not_model,
        opt['folder_name'],
        'bus_or_not',
        np.array(bus_or_not_test_win_df.iloc[:, bus_or_not_index]),
        np.array(bus_or_not_test_win_df[opt['test_label_type']]),
        save_model=False)

    # ~~~~~~~~~~~~~~~~ train mrt_or_car_model ~~~~~~~~~~~~~~~~
    if mrt_or_car_train_opt['random_seed'] is not None:
        np.random.seed(mrt_or_car_train_opt['random_seed'])

    mrt_or_car_train_win_df = train_df.copy()
    mrt_or_car_test_win_df = test_df.copy()

    mrt_or_car_index = preprocessing.get_feature_idx(features['VEHICLE_TYPE'],
                                                     features['ALL_FEATURES'])

    mrt_or_car_test_win_df = mrt_or_car_test_win_df[
        (mrt_or_car_test_win_df[opt['test_label_type']] == 4) |
        (mrt_or_car_test_win_df[opt['test_label_type']] == 2)]

    mrt_or_car_train_win_df = mrt_or_car_train_win_df[
        (mrt_or_car_train_win_df[opt['train_label_type']] == 2) |
        (mrt_or_car_train_win_df[opt['train_label_type']] == 4)]

    preprocessing.reassign_label(mrt_or_car_train_win_df,
                                 mrt_or_car_test_win_df, [[2, 0], [4, 1]])
    logging.info("Start to train mrt_or_car model")
    # features_test, labels_test = preprocessing.balance_dataset(
    #     mrt_or_car_test_win_df.iloc[:, mrt_or_car_index],
    #     mrt_or_car_test_win_df[opt['test_label_type']])
    mrt_or_car_model = train_models.train_dl_veh_type_model(
        mrt_or_car_train_win_df.iloc[:, mrt_or_car_index],
        mrt_or_car_train_win_df[opt['train_label_type']], mrt_or_car_train_opt,
        mrt_or_car_test_win_df.iloc[:, mrt_or_car_index],
        mrt_or_car_test_win_df[opt['test_label_type']])

    logging.info("Start to test mrt_or_car model")
    evaluation.evaluate_single_model(
        mrt_or_car_model,
        opt['folder_name'],
        'mrt_or_car',
        np.array(mrt_or_car_test_win_df.iloc[:, mrt_or_car_index]),
        np.array(mrt_or_car_test_win_df[opt['test_label_type']]),
        save_model=False)

    # ~~~~~~~~~~~~~~~~~ get overall result ~~~~~~~~~~~~~~~~~~~
    overall_test_win_df = test_df.copy()
    overall_result_label = evaluation.evaluate_overall_bibibinary(
        vehicle_or_not_model, bus_or_not_model, mrt_or_car_model,
        overall_test_win_df, overall_test_win_df[opt['test_label_type']],
        vehicle_or_not_index, bus_or_not_index, mrt_or_car_index,
        opt['smooth_overall_result'])
    # # ~~~~~~~~~~~~~~~~~ Save predicted result into csv for visualization ~~~~~~~~~~
    evaluation.save_predicted_result_in_csv(overall_result_label,
                                            overall_test_win_df,
                                            opt['folder_name'], 'overall',
                                            opt['test_label_type'])
    evaluation.save_write(opt['folder_name'])
Beispiel #7
0
def ml_hie_train_test(ml_opt, train_df, test_df, opt, features):
    # Use ML to train hierarchical model
    evaluation.init_write(
        opt, None,
        [features['VEHICLE_OR_NOT_FEATURES'], features['VEHICLE_TYPE']],
        train_df, test_df)

    # ~~~~~~~~~~~~~~ train vehicle/Non-vehicle model ~~~~~~~~~~~~~~~~
    vehicle_or_not_train_win_df = train_df.copy()
    vehicle_or_not_test_win_df = test_df.copy()

    vehicle_or_not_index = preprocessing.get_feature_idx(
        features['VEHICLE_OR_NOT_FEATURES'], features['ALL_FEATURES'])
    preprocessing.reassign_label(vehicle_or_not_train_win_df,
                                 vehicle_or_not_test_win_df,
                                 [[1, 0], [2, 1], [3, 1], [4, 1], [5, 0]])
    logging.info("Start to train vehicle_or_not model")
    vehicle_or_not_model = train_models.train_ml_model(
        np.array(vehicle_or_not_train_win_df.iloc[:, vehicle_or_not_index]),
        np.array(vehicle_or_not_train_win_df[opt['train_label_type']]), ml_opt)

    logging.info("Start to evaluate vehicle_or_not model")
    evaluation.evaluate_single_ml_model(
        vehicle_or_not_model,
        np.array(vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index]),
        np.array(vehicle_or_not_test_win_df[opt['test_label_type']]),
        ['not vehicle', 'vehicle'], opt['folder_name'])

    # ~~~~~~~~~~~~~~~~ train vehicle_type_model ~~~~~~~~~~~~~~~~
    vehicle_type_train_win_df = train_df.copy()
    vehicle_type_test_win_df = test_df.copy()

    vehicle_type_index = preprocessing.get_feature_idx(
        features['VEHICLE_TYPE'], features['ALL_FEATURES'])
    vehicle_type_test_win_df = vehicle_type_test_win_df[
        (vehicle_type_test_win_df[opt['test_label_type']] == 4)
        | (vehicle_type_test_win_df[opt['test_label_type']] == 3) |
        (vehicle_type_test_win_df[opt['test_label_type']] == 2)]

    vehicle_type_train_win_df = vehicle_type_train_win_df[
        (vehicle_type_train_win_df[opt['train_label_type']] == 2)
        | (vehicle_type_train_win_df[opt['train_label_type']] == 3)
        | (vehicle_type_train_win_df[opt['train_label_type']] == 4)]

    preprocessing.reassign_label(vehicle_type_train_win_df,
                                 vehicle_type_test_win_df,
                                 [[2, 0], [3, 1], [4, 2]])
    logging.info("Start to train vehicle_type model")
    vehicle_type_model = train_models.train_ml_model(
        np.array(vehicle_type_train_win_df.iloc[:, vehicle_type_index]),
        np.array(vehicle_type_train_win_df[opt['train_label_type']]), ml_opt)
    logging.info("Start to evaluate vehicle_type model")
    evaluation.evaluate_single_ml_model(
        vehicle_type_model,
        np.array(vehicle_type_test_win_df.iloc[:, vehicle_type_index]),
        np.array(vehicle_type_test_win_df[opt['test_label_type']]),
        ['mrt', 'bus', 'car'], opt['folder_name'])

    # ~~~~~~~~~~~~~~~~~ get overall result ~~~~~~~~~~~~~~~~~~~
    overall_test_win_df = test_df.copy()
    valid_all_based = preprocessing.remove_mix(overall_test_win_df,
                                               'all_based_win_label')
    evaluation.evaluate_overall_manual_2(
        vehicle_or_not_model, vehicle_type_model, valid_all_based,
        valid_all_based['all_based_win_label'], vehicle_or_not_index,
        vehicle_type_index, opt['smooth_overall_result'])
    evaluation.evaluate_overall_manual_2(
        vehicle_or_not_model, vehicle_type_model, overall_test_win_df,
        overall_test_win_df['last_based_win_label'], vehicle_or_not_index,
        vehicle_type_index, opt['smooth_overall_result'])

    # ~~~~~~~~~~~~~~~~~ Save predicted result into csv for visualization ~~~~~~~~~~
    # evaluation.save_predicted_result_in_csv(overall_result_label, overall_test_win_df, opt['folder_name'],
    #                                         'overall', opt['test_label_type'])
    evaluation.save_write(opt['folder_name'])
    # print(evaluation.evaluation_report.write)
    del vehicle_or_not_test_win_df, vehicle_or_not_train_win_df, vehicle_type_train_win_df, vehicle_type_test_win_df, \
        vehicle_type_model, overall_test_win_df, vehicle_or_not_model, vehicle_type_index, vehicle_or_not_index
def predict_from_raw_df(features, opt, test_date, test_nid):
    save_to_db = False
    predict_by_model = True
    folder = './data/data_frame_from_IHPC/processed_df/'
    file_name = test_nid + '_' + test_date + '.csv'
    try:
        all_files = os.listdir(folder)
    except FileNotFoundError:
        if not os.path.exists(folder):
            os.makedirs(folder)
        all_files = os.listdir(folder)

    if file_name in all_files:
        data_frame_full = pd.DataFrame.from_csv(folder + file_name)
    else:
        raw_df, labelled_df_list = get_raw_data_from_csv(test_nid, test_date)

        logging.info("Assigning Labels")
        data_frame_full = assign_app_label(raw_df, labelled_df_list)
        if data_frame_full is None:
            logging.warning(
                "No Dataframes with assigned labels, nid: %s date: %s" %
                (test_nid, test_date))
            return

        logging.info("Feature Calculation and Normalization")
        normalized_df = all_feature_calculation_normalization(
            data_frame_full, features['ALL_FEATURES'], test_date)
        logging.info("Converting to window dataframe")
        data_frame_full = preprocessing.get_win_df(
            [Trip(normalized_df, 1, test_nid)], features['ALL_FEATURES'])
        del normalized_df

        if not os.path.exists(folder):
            os.makedirs(folder)
        data_frame_full.to_csv(folder + file_name)

    if predict_by_model is True:
        data_frame_full = preprocessing.remove_mix(data_frame_full,
                                                   opt['test_label_type'])
        preprocessing.reassign_label(data_frame_full, data_frame_full,
                                     [[0, 5], [1, 5]])
        logging.info("Loading models")
        vehicle_type_model = load_model(
            './evaluation_report_testing_only/google_train_app_test/'
            'google_train_app_test/17-05-15 16:13/', "vehicle_type_model")
        vehicle_or_not_model = \
            load_model('./evaluation_report_testing_only/google_train_app_test/'
                       'google_train_app_test/17-05-15 16:13/', "vehicle_or_not_model")

        logging.info("Getting the result of vehicle_or_nor model")
        vehicle_or_not_test_df = data_frame_full.copy()

        vehicle_or_not_index = preprocessing.get_feature_idx(
            features['VEHICLE_OR_NOT_FEATURES'], features['ALL_FEATURES'])
        preprocessing.reassign_label(vehicle_or_not_test_df,
                                     vehicle_or_not_test_df,
                                     [[1, 0], [2, 1], [3, 1], [4, 1], [5, 0]])

        logging.info("Start to evaluate vehicle_or_not model")
        evaluation.evaluate_single_model(
            vehicle_or_not_model,
            opt['folder_name'],
            'model_name',
            np.array(vehicle_or_not_test_df.iloc[:, vehicle_or_not_index]),
            np.array(vehicle_or_not_test_df[opt['test_label_type']]),
            save_model=False,
            num_classes=2)

        logging.info("Getting the result of vehicle_type model")
        vehicle_type_test_df = data_frame_full.copy()
        vehicle_type_index = preprocessing.get_feature_idx(
            features['VEHICLE_TYPE'], features['ALL_FEATURES'])
        vehicle_type_test_df = vehicle_type_test_df[
            (vehicle_type_test_df[opt['test_label_type']] == 4) |
            (vehicle_type_test_df[opt['test_label_type']] == 3) |
            (vehicle_type_test_df[opt['test_label_type']] == 2)]
        preprocessing.reassign_label(vehicle_type_test_df,
                                     vehicle_type_test_df,
                                     [[2, 0], [3, 1], [4, 2]])

        logging.info("Start to evaluate vehicle_type model")
        evaluation.evaluate_single_model(
            vehicle_type_model,
            opt['folder_name'],
            'model_name',
            np.array(vehicle_type_test_df.iloc[:, vehicle_type_index]),
            np.array(vehicle_type_test_df[opt['test_label_type']]),
            save_model=False,
            num_classes=3)

        # ~~~~~~~~~~~~~~~~~ get overall result ~~~~~~~~~~~~~~~~~~~
        overall_test_win_df = data_frame_full.copy()
        overall_result_label = evaluation.evaluate_overall_manual_2(
            vehicle_or_not_model, vehicle_type_model, overall_test_win_df,
            overall_test_win_df[opt['test_label_type']], vehicle_or_not_index,
            vehicle_type_index, opt['smooth_overall_result'])
        # ~~~~~~~~~~~~~~~~~ Save predicted result into csv for visualization ~~~~~~~~~~
        evaluation.save_predicted_result_in_csv(overall_result_label,
                                                overall_test_win_df,
                                                opt['folder_name'], 'overall',
                                                opt['test_label_type'])
        evaluation.save_write(opt['folder_name'])
    if save_to_db:
        poi_latlon_heu = tripParse.detectPOI_geov(data_frame_full,
                                                  params.stopped_thresh,
                                                  params.poi_min_dwell_time,
                                                  params.loc_round_decimals)

        # Combine the detected POIs using DBSCAN
        pois_latlon_raw = np.array(poi_latlon_heu)
        logging.info("raw POIs: " + str(pois_latlon_raw))
        pois_latlon_comb = []
        if len(pois_latlon_raw) > 0:
            core_samples_mask, labels = apply_DBSCAN(pois_latlon_raw,
                                                     params.poi_comb_range,
                                                     params.poi_comb_samples)
            unique_labels = np.unique(labels)
            logging.info("labels when combing: " + str(labels))
            for unique_label in unique_labels:
                if not unique_label == -1:
                    cur_lat_mean, cur_lon_mean = \
                        np.mean(pois_latlon_raw[(labels == unique_label) & core_samples_mask, :], 0)
                    pois_latlon_comb.append([
                        float(round(cur_lat_mean, params.loc_round_decimals)),
                        float(round(cur_lon_mean, params.loc_round_decimals))
                    ])
            for idx, label in enumerate(labels):
                if label == -1:
                    pois_latlon_comb.append([
                        float(pois_latlon_raw[idx, 0]),
                        float(pois_latlon_raw[idx, 1])
                    ])
        logging.info("combined POIs: " + str(pois_latlon_comb))

        # idetify home & school from the POIs
        home_loc, school_loc, pois_label_temp = \
            tripParse.identify_home_school(pois_latlon_comb, data_frame_full, school_start=params.school_start,
                                           school_end=params.school_end, home_start=params.home_start,
                                           home_end=params.home_end, min_school_thresh=params.min_school_thresh,
                                           poi_cover_radius=params.poi_cover_radius)
        logging.info("Temporary labels of POIs: " + str(pois_label_temp))

        # label all points based on the home/school & POI location
        pois_dict = \
            tripParse.label_pts_by_pois(pois_latlon_comb, pois_label_temp, data_frame_full,
                                        home_cover_radius=params.home_cover_radius,
                                        sch_cover_radius=params.sch_cover_radius,
                                        poi_cover_radius=params.poi_cover_radius,
                                        poi_min_dwell_time=params.poi_min_dwell_time)
        logging.info("Chronological POIs: " +
                     str(pois_dict['pois_latlon_chro']))
        logging.info("Chronological POI labels: " +
                     str(pois_dict['pois_label_chro']))

        # take out trips and add triplabel to data frame
        trips_dict = {
            'trip_num': [],
            'start_poi_loc': [],
            'end_poi_loc': [],
            'tot_dist(km)': [],
            'tot_dura(s)': [],
            'start_sgt': [],
            'end_sgt': [],
            'tot_num_trips': 0,
            'nid': test_nid,
            'analyzed_date': test_date,
            'home_loc': home_loc,
            'school_loc': school_loc,
            'valid_loc_perc': [],
            'num_pt': []
        }

        trip_labels = np.array([None] * len(data_frame_full))
        # chunks of the poi label, -1 chunks are trips
        poi_label_chunks = tripParse.chunks_real(
            data_frame_full['POI_LABEL'].values.tolist(), include_values=True)
        logging.info("Chronological chunks of poi labels: " +
                     str(poi_label_chunks))
        trip_num = 0
        start_poi_num = 0
        end_poi_num = 1
        for idx, label_chunk in enumerate(poi_label_chunks):
            # go through each trip chunk and get information of trips
            if label_chunk[2] == -1:
                # if it's a trip chunk
                trip_num += 1
                trips_dict['trip_num'].append(trip_num)
                trip_labels[label_chunk[0]:label_chunk[1]] = trip_num
                if idx == 0:
                    # if there's no start poi
                    trips_dict['start_poi_loc'].append([])
                    end_poi_num -= 1
                else:
                    trips_dict['start_poi_loc'].append(
                        pois_dict['pois_latlon_chro'][start_poi_num])
                    start_poi_num += 1
                if idx == len(poi_label_chunks) - 1:
                    # if there's no end poi
                    trips_dict['end_poi_loc'].append([])
                else:
                    trips_dict['end_poi_loc'].append(
                        pois_dict['pois_latlon_chro'][end_poi_num])
                    end_poi_num += 1
                trips_dict['tot_dist(km)'].append(
                    round(
                        np.nansum(data_frame_full['DISTANCE_DELTA']
                                  [label_chunk[0]:label_chunk[1]].values) /
                        1000, params.dist_round_decimals))
                trips_dict['tot_dura(s)'].\
                    append(int(np.nansum(data_frame_full['TIME_DELTA'][label_chunk[0]:label_chunk[1]].values)))
                trips_dict['start_sgt'].append(
                    round(data_frame_full['TIME_SGT'][label_chunk[0]], 3))
                trips_dict['end_sgt'].append(
                    round(data_frame_full['TIME_SGT'][label_chunk[1] - 1], 3))
                cur_lat = data_frame_full['WLATITUDE'][
                    label_chunk[0]:label_chunk[1]].values
                trips_dict['valid_loc_perc'].append(
                    len(cur_lat[~np.isnan(cur_lat)]) * 1.0 / len(cur_lat))
                trips_dict['num_pt'].append(label_chunk[1] - label_chunk[0])
            trips_dict['tot_num_trips'] = trip_num

        data_frame_full['TRIP_LABEL'] = pd.Series(
            trip_labels)  # trip_labels are 1, 2, 3, 4, ...

        # con_com = """dbname='nse_mode_id' user='******' password='******' host='localhost'"""
        con_com = """dbname='""" + params.dbname_str + """' user='******' password='******' host='""" + params.dbhost + """' port ='""" + params.dbport + """' """
        conn_psql = psycopg2.connect(con_com)
        cursor_psql = conn_psql.cursor()
        """ save extra labels and features into the PSQL DB table """
        logging.warning(
            "Starting to save extra labels and features into the PSQL DB table"
        )
        save_extra = save_extra_PSQL_2016(conn_psql, cursor_psql,
                                          params.tableExtra2016,
                                          data_frame_full)
        logging.warning("Extra columns saving status: " + str(save_extra))
        """ save trip dictionary into the PSQL DB table """
        if trips_dict['tot_num_trips'] > 0:
            logging.warning(
                "Start to save trip dictionary into the PSQL DB table")
            save_trips = save_tripsummary_PSQL_2016(conn_psql, cursor_psql,
                                                    params.tableTrip2016,
                                                    params.tableExtra2016,
                                                    trips_dict)
            logging.warning("Trip summary saving status: " + str(save_trips))