def dl_veh_type(train_df, test_df, opt, vehicle_or_not_train_opt, vehicle_type_train_opt, train_test_opt, features): # train vehicle type model seperately evaluation.init_write( opt, [vehicle_or_not_train_opt, vehicle_type_train_opt, train_test_opt], [features['VEHICLE_OR_NOT_FEATURES'], features['VEHICLE_TYPE']], train_df, test_df) print(evaluation.evaluation_report.write) if vehicle_type_train_opt['random_seed'] is not None: np.random.seed(vehicle_type_train_opt['random_seed']) vehicle_type_train_win_df = train_df.copy() vehicle_type_test_win_df = test_df.copy() vehicle_type_index = preprocessing.get_feature_idx( features['VEHICLE_TYPE'], features['ALL_FEATURES']) vehicle_type_test_win_df = vehicle_type_test_win_df[ (vehicle_type_test_win_df[opt['test_label_type']] == 4) | (vehicle_type_test_win_df[opt['test_label_type']] == 3) | (vehicle_type_test_win_df[opt['test_label_type']] == 2)] vehicle_type_train_win_df = vehicle_type_train_win_df[ (vehicle_type_train_win_df[opt['train_label_type']] == 2) | (vehicle_type_train_win_df[opt['train_label_type']] == 3) | (vehicle_type_train_win_df[opt['train_label_type']] == 4)] preprocessing.reassign_label(vehicle_type_train_win_df, vehicle_type_test_win_df, [[2, 0], [3, 1], [4, 2]]) # features_test, labels_test = preprocessing.balance_dataset( # vehicle_type_test_win_df.iloc[:, vehicle_type_index], # vehicle_type_test_win_df[opt['test_label_type']]) logging.info("Start to train vehicle_type model") train_models.train_dl_veh_type_model( vehicle_type_train_win_df.iloc[:, vehicle_type_index], vehicle_type_train_win_df[opt['train_label_type']], vehicle_type_train_opt, vehicle_type_test_win_df.iloc[:, vehicle_type_index], vehicle_type_test_win_df[opt['test_label_type']])
def ml_one_train_test(ml_opt, train_df, test_df, opt, features): # Use ml to train All-In-One Model evaluation.init_write( opt, None, [features['VEHICLE_OR_NOT_FEATURES'], features['VEHICLE_TYPE']], train_df, test_df) # ~~~~~~~~~~~~~~ train vehicle/Non-vehicle model ~~~~~~~~~~~~~~~~ one_model_train_win_df = train_df.copy() one_model_test_win_df = test_df.copy() one_model_index = preprocessing.get_feature_idx(features['ONE_MODEL'], features['ALL_FEATURES']) preprocessing.reassign_label(one_model_train_win_df, one_model_test_win_df, [[1, 0], [2, 1], [3, 2], [4, 3], [5, 0]]) logging.info("Start to train ml one model") start_time = time.time() one_model = train_models.train_ml_model( np.array(one_model_train_win_df.iloc[:, one_model_index]), np.array(one_model_train_win_df[opt['train_label_type']]), ml_opt) logging.info("Finished to train ml one_model model") print("it took", time.time() - start_time, "seconds.") logging.info("Start to evaluate ml one model") valid_all_based = preprocessing.remove_mix(one_model_test_win_df, 'all_based_win_label') evaluation.evaluate_single_ml_model( one_model, np.array(valid_all_based.iloc[:, one_model_index]), np.array(valid_all_based['all_based_win_label']), ['not vehicle', 'mrt', 'bus', 'car'], opt['folder_name']) evaluation.evaluate_single_ml_model( one_model, np.array(one_model_test_win_df.iloc[:, one_model_index]), np.array(one_model_test_win_df['last_based_win_label']), ['not vehicle', 'mrt', 'bus', 'car'], opt['folder_name']) evaluation.save_write(opt['folder_name']) # print(evaluation.evaluation_report.write) del one_model_test_win_df, one_model_train_win_df, one_model_index, one_model
def dl_train_test(train_df, test_df, opt, vehicle_or_not_train_opt, vehicle_type_train_opt, train_test_opt, features): # Use Deep Learning to train hierarchical model evaluation.init_write( opt, [vehicle_or_not_train_opt, vehicle_type_train_opt, train_test_opt], [features['VEHICLE_OR_NOT_FEATURES'], features['VEHICLE_TYPE']], train_df, test_df) print(evaluation.evaluation_report.write) # ~~~~~~~~~~~~~~ train vehicle/Non-vehicle model ~~~~~~~~~~~~~~~~ if vehicle_or_not_train_opt['random_seed'] is not None: np.random.seed(vehicle_or_not_train_opt['random_seed']) vehicle_or_not_train_win_df = train_df.copy() vehicle_or_not_test_win_df = test_df.copy() vehicle_or_not_index = preprocessing.get_feature_idx( features['VEHICLE_OR_NOT_FEATURES'], features['ALL_FEATURES']) preprocessing.reassign_label(vehicle_or_not_train_win_df, vehicle_or_not_test_win_df, [[1, 0], [2, 1], [3, 1], [4, 1], [5, 0]]) logging.info("Start to train vehicle_or_not model") # features_test, labels_test = preprocessing.balance_dataset( # vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index], # vehicle_or_not_test_win_df[opt['test_label_type']]) vehicle_or_not_model = train_models.train_dl_veh_or_not_model( vehicle_or_not_train_win_df.iloc[:, vehicle_or_not_index], vehicle_or_not_train_win_df[opt['train_label_type']], vehicle_or_not_train_opt, vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index], vehicle_or_not_test_win_df[opt['test_label_type']]) logging.info("Start to test vehicle_or_not model") evaluation.evaluate_single_model( vehicle_or_not_model, opt['folder_name'], 'vehicle_or_not', np.array(vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index]), np.array(vehicle_or_not_test_win_df[opt['test_label_type']]), save_model=False) # ~~~~~~~~~~~~~~~~ train vehicle_type_model ~~~~~~~~~~~~~~~~ if vehicle_type_train_opt['random_seed'] is not None: np.random.seed(vehicle_type_train_opt['random_seed']) vehicle_type_train_win_df = train_df.copy() vehicle_type_test_win_df = test_df.copy() vehicle_type_index = preprocessing.get_feature_idx( features['VEHICLE_TYPE'], features['ALL_FEATURES']) vehicle_type_test_win_df = vehicle_type_test_win_df[ (vehicle_type_test_win_df[opt['test_label_type']] == 4) | (vehicle_type_test_win_df[opt['test_label_type']] == 3) | (vehicle_type_test_win_df[opt['test_label_type']] == 2)] vehicle_type_train_win_df = vehicle_type_train_win_df[ (vehicle_type_train_win_df[opt['train_label_type']] == 2) | (vehicle_type_train_win_df[opt['train_label_type']] == 3) | (vehicle_type_train_win_df[opt['train_label_type']] == 4)] preprocessing.reassign_label(vehicle_type_train_win_df, vehicle_type_test_win_df, [[2, 0], [3, 1], [4, 2]]) # features_test, labels_test = preprocessing.balance_dataset( # vehicle_type_test_win_df.iloc[:, vehicle_type_index], # vehicle_type_test_win_df[opt['test_label_type']]) logging.info("Start to train vehicle_type model") vehicle_type_model = train_models.train_dl_veh_type_model( vehicle_type_train_win_df.iloc[:, vehicle_type_index], vehicle_type_train_win_df[opt['train_label_type']], vehicle_type_train_opt, vehicle_type_test_win_df.iloc[:, vehicle_type_index], vehicle_type_test_win_df[opt['test_label_type']]) logging.info("Start to test vehicle_type model") if vehicle_type_train_opt['middle_output'] is True: evaluation.evaluate_single_ml_model( vehicle_type_model, np.array(vehicle_type_test_win_df.iloc[:, vehicle_type_index]), np.array(vehicle_type_test_win_df[opt['test_label_type']]), ['mrt', 'bus', 'car'], opt['folder_name']) else: evaluation.evaluate_single_model( vehicle_type_model, opt['folder_name'], 'vehicle_type', np.array(vehicle_type_test_win_df.iloc[:, vehicle_type_index]), np.array(vehicle_type_test_win_df[opt['test_label_type']]), save_model=False) # print(evaluation.evaluation_report.write) # ~~~~~~~~~~~~~~~~~ get overall result ~~~~~~~~~~~~~~~~~~~ overall_test_win_df = test_df.copy() valid_all_based = preprocessing.remove_mix(overall_test_win_df, 'all_based_win_label') overall_result_label = evaluation.evaluate_overall_manual_2( vehicle_or_not_model, vehicle_type_model, valid_all_based, valid_all_based['all_based_win_label'], vehicle_or_not_index, vehicle_type_index, opt['smooth_overall_result']) overall_result_label = evaluation.evaluate_overall_manual_2( vehicle_or_not_model, vehicle_type_model, overall_test_win_df, overall_test_win_df['last_based_win_label'], vehicle_or_not_index, vehicle_type_index, opt['smooth_overall_result']) # ~~~~~~~~~~~~~~~~~ Save predicted result into csv for visualization ~~~~~~~~~~ evaluation.save_predicted_result_in_csv(overall_result_label, overall_test_win_df, opt['folder_name'], 'overall', opt['test_label_type']) evaluation.save_write(opt['folder_name']) del overall_test_win_df, vehicle_or_not_test_win_df, vehicle_or_not_train_win_df, vehicle_or_not_index, \ vehicle_type_model, vehicle_type_train_win_df, vehicle_type_test_win_df, \ vehicle_type_index
def lstm_train_test(train_df, test_df, opt, vehicle_or_not_train_opt, vehicle_type_train_opt, train_test_opt, features): # Use lstm net to train hierarchical model evaluation.init_write( opt, [vehicle_or_not_train_opt, vehicle_type_train_opt, train_test_opt], [features['VEHICLE_OR_NOT_FEATURES'], features['VEHICLE_TYPE']], train_df, test_df) print(evaluation.evaluation_report.write) # # ~~~~~~~~~~~~~~ train vehicle/Non-vehicle model ~~~~~~~~~~~~~~~~ if vehicle_or_not_train_opt['random_seed'] is not None: np.random.seed(vehicle_or_not_train_opt['random_seed']) vehicle_or_not_train_win_df = train_df.copy() vehicle_or_not_test_win_df = test_df.copy() vehicle_or_not_index = preprocessing.get_feature_idx( features['VEHICLE_OR_NOT_FEATURES'], features['ALL_FEATURES']) preprocessing.reassign_label(vehicle_or_not_train_win_df, vehicle_or_not_test_win_df, [[1, 0], [2, 1], [3, 1], [4, 1], [5, 0]]) logging.info("Start to train vehicle_or_not model") vehicle_or_not_model = train_models.train_lstm_model( vehicle_or_not_train_win_df.iloc[:, vehicle_or_not_index], vehicle_or_not_train_win_df[opt['train_label_type']], vehicle_or_not_train_opt) logging.info("Start to test vehicle_or_not model") features_test = np.reshape( np.array(vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index]), (len(vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index]), 6, len(features['VEHICLE_OR_NOT_FEATURES']))) evaluation.evaluate_single_model( vehicle_or_not_model, opt['folder_name'], 'vehicle_or_not', features_test, np.array(vehicle_or_not_test_win_df[opt['test_label_type']]), save_model=False) # ~~~~~~~~~~~~~~~~ train vehicle_type_model ~~~~~~~~~~~~~~~~ if vehicle_type_train_opt['random_seed'] is not None: np.random.seed(vehicle_type_train_opt['random_seed']) vehicle_type_train_win_df = train_df.copy() vehicle_type_test_win_df = test_df.copy() vehicle_type_index = preprocessing.get_feature_idx( features['VEHICLE_TYPE'], features['ALL_FEATURES']) vehicle_type_test_win_df = vehicle_type_test_win_df[ (vehicle_type_test_win_df[opt['test_label_type']] == 4) | (vehicle_type_test_win_df[opt['test_label_type']] == 3) | (vehicle_type_test_win_df[opt['test_label_type']] == 2)] vehicle_type_train_win_df = vehicle_type_train_win_df[ (vehicle_type_train_win_df[opt['train_label_type']] == 2) | (vehicle_type_train_win_df[opt['train_label_type']] == 3) | (vehicle_type_train_win_df[opt['train_label_type']] == 4)] preprocessing.reassign_label(vehicle_type_train_win_df, vehicle_type_test_win_df, [[2, 0], [3, 1], [4, 2]]) logging.info("Start to train vehicle_type model") vehicle_type_model = train_models.train_lstm_model( vehicle_type_train_win_df.iloc[:, vehicle_type_index], vehicle_type_train_win_df[opt['train_label_type']], vehicle_type_train_opt) logging.info("Start to test vehicle_type model") features_test = np.reshape( np.array(vehicle_type_test_win_df.iloc[:, vehicle_type_index]), (len(vehicle_type_test_win_df.iloc[:, vehicle_type_index]), 6, len(features['VEHICLE_TYPE']))) evaluation.evaluate_single_model( vehicle_type_model, opt['folder_name'], 'vehicle_type', features_test, np.array(vehicle_type_test_win_df[opt['test_label_type']]), save_model=False) # ~~~~~~~~~~~~~~~~~ get overall result ~~~~~~~~~~~~~~~~~~~ overall_test_win_df = test_df.copy() overall_result_label = evaluation.evaluate_overall_lstm( vehicle_or_not_model, vehicle_type_model, overall_test_win_df, overall_test_win_df[opt['test_label_type']], vehicle_or_not_index, vehicle_type_index, opt['smooth_overall_result']) # ~~~~~~~~~~~~~~~~~ Save predicted result into csv for visualization ~~~~~~~~~~ evaluation.save_predicted_result_in_csv(overall_result_label, overall_test_win_df, opt['folder_name'], 'overall', opt['test_label_type']) evaluation.save_write(opt['folder_name']) del overall_test_win_df, vehicle_or_not_test_win_df, vehicle_or_not_train_win_df, vehicle_or_not_index, \ vehicle_type_model, vehicle_or_not_model, vehicle_type_train_win_df, vehicle_type_test_win_df, \ vehicle_type_index
def dl_one_model_train_test(train_df, test_df, opt, one_model_train_opt, train_test_opt, features): # Use dl to train All-In-One model evaluation.init_write(opt, [one_model_train_opt, train_test_opt], features['ONE_MODEL'], train_df, test_df) # # ~~~~~~~~~~~~~~ train one_model model ~~~~~~~~~~~~~~~~ if one_model_train_opt['random_seed'] is not None: np.random.seed(one_model_train_opt['random_seed']) one_model_train_win_df = train_df.copy() one_model_test_win_df = test_df.copy() one_model_index = preprocessing.get_feature_idx(features['ONE_MODEL'], features['ALL_FEATURES']) preprocessing.reassign_label(one_model_train_win_df, one_model_test_win_df, [[1, 0], [2, 1], [3, 2], [4, 3], [5, 0]]) logging.info("Start to train one_model model") start_time = time.time() # features_test, labels_test = preprocessing.balance_dataset( # one_model_test_win_df.iloc[:, one_model_index], # one_model_test_win_df[opt['test_label_type']]) one_model_model = train_models.train_dl_veh_type_model( one_model_train_win_df.iloc[:, one_model_index], one_model_train_win_df[opt['train_label_type']], one_model_train_opt, one_model_test_win_df.iloc[:, one_model_index], one_model_test_win_df[opt['test_label_type']]) logging.info("Finished to train one_model model") print("it took", time.time() - start_time, "seconds.") logging.info("Start to test one_model model") if one_model_train_opt['DLNetwork'] == 'LSTM': features_test = np.reshape( np.array(one_model_test_win_df.iloc[:, one_model_index]), (len(one_model_test_win_df.iloc[:, one_model_index]), 6, len(features['ONE_MODEL']))) evaluation.evaluate_single_model( one_model_model, opt['folder_name'], 'one_model', features_test, np.array(one_model_test_win_df[opt['test_label_type']]), save_model=False) else: valid_all_based = preprocessing.remove_mix(one_model_test_win_df, 'all_based_win_label') evaluation.evaluate_single_model( one_model_model, opt['folder_name'], 'one_model', np.array(valid_all_based.iloc[:, one_model_index]), np.array(valid_all_based['all_based_win_label']), save_model=False) evaluation.evaluate_single_model( one_model_model, opt['folder_name'], 'one_model', np.array(one_model_test_win_df.iloc[:, one_model_index]), np.array(one_model_test_win_df['last_based_win_label']), save_model=False) evaluation.save_write(opt['folder_name'])
def dl_train_test_3binary(train_df, test_df, opt, vehicle_or_not_train_opt, vehicle_type_train_opt, bus_or_not_train_opt, mrt_or_car_train_opt, train_test_opt, features): # Use dl to train bi-bi-binary model evaluation.init_write(opt, [ vehicle_or_not_train_opt, vehicle_type_train_opt, bus_or_not_train_opt, mrt_or_car_train_opt, train_test_opt ], [ features['VEHICLE_OR_NOT_FEATURES'], features['BUS_OR_NOT'], features['MRT_OR_CAR'] ], train_df, test_df) # # ~~~~~~~~~~~~~~ train vehicle/Non-vehicle model ~~~~~~~~~~~~~~~~ if vehicle_or_not_train_opt['random_seed'] is not None: np.random.seed(vehicle_or_not_train_opt['random_seed']) vehicle_or_not_train_win_df = train_df.copy() vehicle_or_not_test_win_df = test_df.copy() vehicle_or_not_index = preprocessing.get_feature_idx( features['VEHICLE_OR_NOT_FEATURES'], features['ALL_FEATURES']) preprocessing.reassign_label(vehicle_or_not_train_win_df, vehicle_or_not_test_win_df, [[1, 0], [2, 1], [3, 1], [4, 1], [5, 0]]) logging.info("Start to train vehicle_or_not model") # features_test, labels_test = preprocessing.balance_dataset( # vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index], # vehicle_or_not_test_win_df[opt['test_label_type']]) vehicle_or_not_model = train_models.train_dl_veh_or_not_model( vehicle_or_not_train_win_df.iloc[:, vehicle_or_not_index], vehicle_or_not_train_win_df[opt['train_label_type']], vehicle_or_not_train_opt, vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index], vehicle_or_not_test_win_df[opt['test_label_type']]) logging.info("Start to test vehicle_or_not model") evaluation.evaluate_single_model( vehicle_or_not_model, opt['folder_name'], 'vehicle_or_not', np.array(vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index]), np.array(vehicle_or_not_test_win_df[opt['test_label_type']]), save_model=False) # ~~~~~~~~~~~~~~~~ train bus_or_not_model ~~~~~~~~~~~~~~~~~~ if bus_or_not_train_opt['random_seed'] is not None: np.random.seed(bus_or_not_train_opt['random_seed']) bus_or_not_train_win_df = train_df.copy() bus_or_not_test_win_df = test_df.copy() bus_or_not_index = preprocessing.get_feature_idx(features['BUS_OR_NOT'], features['ALL_FEATURES']) bus_or_not_test_win_df = bus_or_not_test_win_df[ (bus_or_not_test_win_df[opt['test_label_type']] == 4) | (bus_or_not_test_win_df[opt['test_label_type']] == 3) | (bus_or_not_test_win_df[opt['test_label_type']] == 2)] bus_or_not_train_win_df = bus_or_not_train_win_df[ (bus_or_not_train_win_df[opt['train_label_type']] == 2) | (bus_or_not_train_win_df[opt['train_label_type']] == 3) | (bus_or_not_train_win_df[opt['train_label_type']] == 4)] preprocessing.reassign_label(bus_or_not_train_win_df, bus_or_not_test_win_df, [[2, 0], [3, 1], [4, 0]]) logging.info("Start to train bus_or_not model") # features_test, labels_test = preprocessing.balance_dataset( # bus_or_not_test_win_df.iloc[:, bus_or_not_index], # bus_or_not_test_win_df[opt['test_label_type']]) bus_or_not_model = train_models.train_dl_veh_type_model( bus_or_not_train_win_df.iloc[:, bus_or_not_index], bus_or_not_train_win_df[opt['train_label_type']], bus_or_not_train_opt, bus_or_not_test_win_df.iloc[:, bus_or_not_index], bus_or_not_test_win_df[opt['test_label_type']]) logging.info("Start to test bus_or_not model") evaluation.evaluate_single_model( bus_or_not_model, opt['folder_name'], 'bus_or_not', np.array(bus_or_not_test_win_df.iloc[:, bus_or_not_index]), np.array(bus_or_not_test_win_df[opt['test_label_type']]), save_model=False) # ~~~~~~~~~~~~~~~~ train mrt_or_car_model ~~~~~~~~~~~~~~~~ if mrt_or_car_train_opt['random_seed'] is not None: np.random.seed(mrt_or_car_train_opt['random_seed']) mrt_or_car_train_win_df = train_df.copy() mrt_or_car_test_win_df = test_df.copy() mrt_or_car_index = preprocessing.get_feature_idx(features['VEHICLE_TYPE'], features['ALL_FEATURES']) mrt_or_car_test_win_df = mrt_or_car_test_win_df[ (mrt_or_car_test_win_df[opt['test_label_type']] == 4) | (mrt_or_car_test_win_df[opt['test_label_type']] == 2)] mrt_or_car_train_win_df = mrt_or_car_train_win_df[ (mrt_or_car_train_win_df[opt['train_label_type']] == 2) | (mrt_or_car_train_win_df[opt['train_label_type']] == 4)] preprocessing.reassign_label(mrt_or_car_train_win_df, mrt_or_car_test_win_df, [[2, 0], [4, 1]]) logging.info("Start to train mrt_or_car model") # features_test, labels_test = preprocessing.balance_dataset( # mrt_or_car_test_win_df.iloc[:, mrt_or_car_index], # mrt_or_car_test_win_df[opt['test_label_type']]) mrt_or_car_model = train_models.train_dl_veh_type_model( mrt_or_car_train_win_df.iloc[:, mrt_or_car_index], mrt_or_car_train_win_df[opt['train_label_type']], mrt_or_car_train_opt, mrt_or_car_test_win_df.iloc[:, mrt_or_car_index], mrt_or_car_test_win_df[opt['test_label_type']]) logging.info("Start to test mrt_or_car model") evaluation.evaluate_single_model( mrt_or_car_model, opt['folder_name'], 'mrt_or_car', np.array(mrt_or_car_test_win_df.iloc[:, mrt_or_car_index]), np.array(mrt_or_car_test_win_df[opt['test_label_type']]), save_model=False) # ~~~~~~~~~~~~~~~~~ get overall result ~~~~~~~~~~~~~~~~~~~ overall_test_win_df = test_df.copy() overall_result_label = evaluation.evaluate_overall_bibibinary( vehicle_or_not_model, bus_or_not_model, mrt_or_car_model, overall_test_win_df, overall_test_win_df[opt['test_label_type']], vehicle_or_not_index, bus_or_not_index, mrt_or_car_index, opt['smooth_overall_result']) # # ~~~~~~~~~~~~~~~~~ Save predicted result into csv for visualization ~~~~~~~~~~ evaluation.save_predicted_result_in_csv(overall_result_label, overall_test_win_df, opt['folder_name'], 'overall', opt['test_label_type']) evaluation.save_write(opt['folder_name'])
def ml_hie_train_test(ml_opt, train_df, test_df, opt, features): # Use ML to train hierarchical model evaluation.init_write( opt, None, [features['VEHICLE_OR_NOT_FEATURES'], features['VEHICLE_TYPE']], train_df, test_df) # ~~~~~~~~~~~~~~ train vehicle/Non-vehicle model ~~~~~~~~~~~~~~~~ vehicle_or_not_train_win_df = train_df.copy() vehicle_or_not_test_win_df = test_df.copy() vehicle_or_not_index = preprocessing.get_feature_idx( features['VEHICLE_OR_NOT_FEATURES'], features['ALL_FEATURES']) preprocessing.reassign_label(vehicle_or_not_train_win_df, vehicle_or_not_test_win_df, [[1, 0], [2, 1], [3, 1], [4, 1], [5, 0]]) logging.info("Start to train vehicle_or_not model") vehicle_or_not_model = train_models.train_ml_model( np.array(vehicle_or_not_train_win_df.iloc[:, vehicle_or_not_index]), np.array(vehicle_or_not_train_win_df[opt['train_label_type']]), ml_opt) logging.info("Start to evaluate vehicle_or_not model") evaluation.evaluate_single_ml_model( vehicle_or_not_model, np.array(vehicle_or_not_test_win_df.iloc[:, vehicle_or_not_index]), np.array(vehicle_or_not_test_win_df[opt['test_label_type']]), ['not vehicle', 'vehicle'], opt['folder_name']) # ~~~~~~~~~~~~~~~~ train vehicle_type_model ~~~~~~~~~~~~~~~~ vehicle_type_train_win_df = train_df.copy() vehicle_type_test_win_df = test_df.copy() vehicle_type_index = preprocessing.get_feature_idx( features['VEHICLE_TYPE'], features['ALL_FEATURES']) vehicle_type_test_win_df = vehicle_type_test_win_df[ (vehicle_type_test_win_df[opt['test_label_type']] == 4) | (vehicle_type_test_win_df[opt['test_label_type']] == 3) | (vehicle_type_test_win_df[opt['test_label_type']] == 2)] vehicle_type_train_win_df = vehicle_type_train_win_df[ (vehicle_type_train_win_df[opt['train_label_type']] == 2) | (vehicle_type_train_win_df[opt['train_label_type']] == 3) | (vehicle_type_train_win_df[opt['train_label_type']] == 4)] preprocessing.reassign_label(vehicle_type_train_win_df, vehicle_type_test_win_df, [[2, 0], [3, 1], [4, 2]]) logging.info("Start to train vehicle_type model") vehicle_type_model = train_models.train_ml_model( np.array(vehicle_type_train_win_df.iloc[:, vehicle_type_index]), np.array(vehicle_type_train_win_df[opt['train_label_type']]), ml_opt) logging.info("Start to evaluate vehicle_type model") evaluation.evaluate_single_ml_model( vehicle_type_model, np.array(vehicle_type_test_win_df.iloc[:, vehicle_type_index]), np.array(vehicle_type_test_win_df[opt['test_label_type']]), ['mrt', 'bus', 'car'], opt['folder_name']) # ~~~~~~~~~~~~~~~~~ get overall result ~~~~~~~~~~~~~~~~~~~ overall_test_win_df = test_df.copy() valid_all_based = preprocessing.remove_mix(overall_test_win_df, 'all_based_win_label') evaluation.evaluate_overall_manual_2( vehicle_or_not_model, vehicle_type_model, valid_all_based, valid_all_based['all_based_win_label'], vehicle_or_not_index, vehicle_type_index, opt['smooth_overall_result']) evaluation.evaluate_overall_manual_2( vehicle_or_not_model, vehicle_type_model, overall_test_win_df, overall_test_win_df['last_based_win_label'], vehicle_or_not_index, vehicle_type_index, opt['smooth_overall_result']) # ~~~~~~~~~~~~~~~~~ Save predicted result into csv for visualization ~~~~~~~~~~ # evaluation.save_predicted_result_in_csv(overall_result_label, overall_test_win_df, opt['folder_name'], # 'overall', opt['test_label_type']) evaluation.save_write(opt['folder_name']) # print(evaluation.evaluation_report.write) del vehicle_or_not_test_win_df, vehicle_or_not_train_win_df, vehicle_type_train_win_df, vehicle_type_test_win_df, \ vehicle_type_model, overall_test_win_df, vehicle_or_not_model, vehicle_type_index, vehicle_or_not_index
def predict_from_raw_df(features, opt, test_date, test_nid): save_to_db = False predict_by_model = True folder = './data/data_frame_from_IHPC/processed_df/' file_name = test_nid + '_' + test_date + '.csv' try: all_files = os.listdir(folder) except FileNotFoundError: if not os.path.exists(folder): os.makedirs(folder) all_files = os.listdir(folder) if file_name in all_files: data_frame_full = pd.DataFrame.from_csv(folder + file_name) else: raw_df, labelled_df_list = get_raw_data_from_csv(test_nid, test_date) logging.info("Assigning Labels") data_frame_full = assign_app_label(raw_df, labelled_df_list) if data_frame_full is None: logging.warning( "No Dataframes with assigned labels, nid: %s date: %s" % (test_nid, test_date)) return logging.info("Feature Calculation and Normalization") normalized_df = all_feature_calculation_normalization( data_frame_full, features['ALL_FEATURES'], test_date) logging.info("Converting to window dataframe") data_frame_full = preprocessing.get_win_df( [Trip(normalized_df, 1, test_nid)], features['ALL_FEATURES']) del normalized_df if not os.path.exists(folder): os.makedirs(folder) data_frame_full.to_csv(folder + file_name) if predict_by_model is True: data_frame_full = preprocessing.remove_mix(data_frame_full, opt['test_label_type']) preprocessing.reassign_label(data_frame_full, data_frame_full, [[0, 5], [1, 5]]) logging.info("Loading models") vehicle_type_model = load_model( './evaluation_report_testing_only/google_train_app_test/' 'google_train_app_test/17-05-15 16:13/', "vehicle_type_model") vehicle_or_not_model = \ load_model('./evaluation_report_testing_only/google_train_app_test/' 'google_train_app_test/17-05-15 16:13/', "vehicle_or_not_model") logging.info("Getting the result of vehicle_or_nor model") vehicle_or_not_test_df = data_frame_full.copy() vehicle_or_not_index = preprocessing.get_feature_idx( features['VEHICLE_OR_NOT_FEATURES'], features['ALL_FEATURES']) preprocessing.reassign_label(vehicle_or_not_test_df, vehicle_or_not_test_df, [[1, 0], [2, 1], [3, 1], [4, 1], [5, 0]]) logging.info("Start to evaluate vehicle_or_not model") evaluation.evaluate_single_model( vehicle_or_not_model, opt['folder_name'], 'model_name', np.array(vehicle_or_not_test_df.iloc[:, vehicle_or_not_index]), np.array(vehicle_or_not_test_df[opt['test_label_type']]), save_model=False, num_classes=2) logging.info("Getting the result of vehicle_type model") vehicle_type_test_df = data_frame_full.copy() vehicle_type_index = preprocessing.get_feature_idx( features['VEHICLE_TYPE'], features['ALL_FEATURES']) vehicle_type_test_df = vehicle_type_test_df[ (vehicle_type_test_df[opt['test_label_type']] == 4) | (vehicle_type_test_df[opt['test_label_type']] == 3) | (vehicle_type_test_df[opt['test_label_type']] == 2)] preprocessing.reassign_label(vehicle_type_test_df, vehicle_type_test_df, [[2, 0], [3, 1], [4, 2]]) logging.info("Start to evaluate vehicle_type model") evaluation.evaluate_single_model( vehicle_type_model, opt['folder_name'], 'model_name', np.array(vehicle_type_test_df.iloc[:, vehicle_type_index]), np.array(vehicle_type_test_df[opt['test_label_type']]), save_model=False, num_classes=3) # ~~~~~~~~~~~~~~~~~ get overall result ~~~~~~~~~~~~~~~~~~~ overall_test_win_df = data_frame_full.copy() overall_result_label = evaluation.evaluate_overall_manual_2( vehicle_or_not_model, vehicle_type_model, overall_test_win_df, overall_test_win_df[opt['test_label_type']], vehicle_or_not_index, vehicle_type_index, opt['smooth_overall_result']) # ~~~~~~~~~~~~~~~~~ Save predicted result into csv for visualization ~~~~~~~~~~ evaluation.save_predicted_result_in_csv(overall_result_label, overall_test_win_df, opt['folder_name'], 'overall', opt['test_label_type']) evaluation.save_write(opt['folder_name']) if save_to_db: poi_latlon_heu = tripParse.detectPOI_geov(data_frame_full, params.stopped_thresh, params.poi_min_dwell_time, params.loc_round_decimals) # Combine the detected POIs using DBSCAN pois_latlon_raw = np.array(poi_latlon_heu) logging.info("raw POIs: " + str(pois_latlon_raw)) pois_latlon_comb = [] if len(pois_latlon_raw) > 0: core_samples_mask, labels = apply_DBSCAN(pois_latlon_raw, params.poi_comb_range, params.poi_comb_samples) unique_labels = np.unique(labels) logging.info("labels when combing: " + str(labels)) for unique_label in unique_labels: if not unique_label == -1: cur_lat_mean, cur_lon_mean = \ np.mean(pois_latlon_raw[(labels == unique_label) & core_samples_mask, :], 0) pois_latlon_comb.append([ float(round(cur_lat_mean, params.loc_round_decimals)), float(round(cur_lon_mean, params.loc_round_decimals)) ]) for idx, label in enumerate(labels): if label == -1: pois_latlon_comb.append([ float(pois_latlon_raw[idx, 0]), float(pois_latlon_raw[idx, 1]) ]) logging.info("combined POIs: " + str(pois_latlon_comb)) # idetify home & school from the POIs home_loc, school_loc, pois_label_temp = \ tripParse.identify_home_school(pois_latlon_comb, data_frame_full, school_start=params.school_start, school_end=params.school_end, home_start=params.home_start, home_end=params.home_end, min_school_thresh=params.min_school_thresh, poi_cover_radius=params.poi_cover_radius) logging.info("Temporary labels of POIs: " + str(pois_label_temp)) # label all points based on the home/school & POI location pois_dict = \ tripParse.label_pts_by_pois(pois_latlon_comb, pois_label_temp, data_frame_full, home_cover_radius=params.home_cover_radius, sch_cover_radius=params.sch_cover_radius, poi_cover_radius=params.poi_cover_radius, poi_min_dwell_time=params.poi_min_dwell_time) logging.info("Chronological POIs: " + str(pois_dict['pois_latlon_chro'])) logging.info("Chronological POI labels: " + str(pois_dict['pois_label_chro'])) # take out trips and add triplabel to data frame trips_dict = { 'trip_num': [], 'start_poi_loc': [], 'end_poi_loc': [], 'tot_dist(km)': [], 'tot_dura(s)': [], 'start_sgt': [], 'end_sgt': [], 'tot_num_trips': 0, 'nid': test_nid, 'analyzed_date': test_date, 'home_loc': home_loc, 'school_loc': school_loc, 'valid_loc_perc': [], 'num_pt': [] } trip_labels = np.array([None] * len(data_frame_full)) # chunks of the poi label, -1 chunks are trips poi_label_chunks = tripParse.chunks_real( data_frame_full['POI_LABEL'].values.tolist(), include_values=True) logging.info("Chronological chunks of poi labels: " + str(poi_label_chunks)) trip_num = 0 start_poi_num = 0 end_poi_num = 1 for idx, label_chunk in enumerate(poi_label_chunks): # go through each trip chunk and get information of trips if label_chunk[2] == -1: # if it's a trip chunk trip_num += 1 trips_dict['trip_num'].append(trip_num) trip_labels[label_chunk[0]:label_chunk[1]] = trip_num if idx == 0: # if there's no start poi trips_dict['start_poi_loc'].append([]) end_poi_num -= 1 else: trips_dict['start_poi_loc'].append( pois_dict['pois_latlon_chro'][start_poi_num]) start_poi_num += 1 if idx == len(poi_label_chunks) - 1: # if there's no end poi trips_dict['end_poi_loc'].append([]) else: trips_dict['end_poi_loc'].append( pois_dict['pois_latlon_chro'][end_poi_num]) end_poi_num += 1 trips_dict['tot_dist(km)'].append( round( np.nansum(data_frame_full['DISTANCE_DELTA'] [label_chunk[0]:label_chunk[1]].values) / 1000, params.dist_round_decimals)) trips_dict['tot_dura(s)'].\ append(int(np.nansum(data_frame_full['TIME_DELTA'][label_chunk[0]:label_chunk[1]].values))) trips_dict['start_sgt'].append( round(data_frame_full['TIME_SGT'][label_chunk[0]], 3)) trips_dict['end_sgt'].append( round(data_frame_full['TIME_SGT'][label_chunk[1] - 1], 3)) cur_lat = data_frame_full['WLATITUDE'][ label_chunk[0]:label_chunk[1]].values trips_dict['valid_loc_perc'].append( len(cur_lat[~np.isnan(cur_lat)]) * 1.0 / len(cur_lat)) trips_dict['num_pt'].append(label_chunk[1] - label_chunk[0]) trips_dict['tot_num_trips'] = trip_num data_frame_full['TRIP_LABEL'] = pd.Series( trip_labels) # trip_labels are 1, 2, 3, 4, ... # con_com = """dbname='nse_mode_id' user='******' password='******' host='localhost'""" con_com = """dbname='""" + params.dbname_str + """' user='******' password='******' host='""" + params.dbhost + """' port ='""" + params.dbport + """' """ conn_psql = psycopg2.connect(con_com) cursor_psql = conn_psql.cursor() """ save extra labels and features into the PSQL DB table """ logging.warning( "Starting to save extra labels and features into the PSQL DB table" ) save_extra = save_extra_PSQL_2016(conn_psql, cursor_psql, params.tableExtra2016, data_frame_full) logging.warning("Extra columns saving status: " + str(save_extra)) """ save trip dictionary into the PSQL DB table """ if trips_dict['tot_num_trips'] > 0: logging.warning( "Start to save trip dictionary into the PSQL DB table") save_trips = save_tripsummary_PSQL_2016(conn_psql, cursor_psql, params.tableTrip2016, params.tableExtra2016, trips_dict) logging.warning("Trip summary saving status: " + str(save_trips))