def run_test(self): driver_source = Driver.DriverSource() main_driver = driver_source.get_driver(1081) for n in range(5): trip = main_driver.get_data_random_trip() plotTripData(trip.tang_accel_mag, trip.speed)
def run_one(self): """ Use drive id: #### get all his matching trips (~60). Split 40/20 train/test Get 60 random driver, and for each select a random trip and gain split 40/20 for each trip, extract: mean_speed_to_stop; mean_speed_from_stop, mean_accel_inout_bends, mean_trip_speed Run SVM with RBF kernal and use 4 fold cross val to obtain gamma and C. See what we get with model in Test. Exceed 90 - go for it with add drivers :return: """ # minimum number of trips to process MIN_MATCH = 100 # get a particular driver, obtain his/her trips then find the ones that match driver_source = Driver.DriverSource() main_driver = driver_source.get_driver(2053) trip_list = main_driver.get_all_trips() trips_to_process = main_driver.get_matching_trips() # fill up the trip collection with other trips, using random trips from # other drivers if necessary record_count = len(trips_to_process) print 'Matching Records:', record_count if record_count < MIN_MATCH: trip_ids_now = [trip.Id for trip in trips_to_process] other_trips = [trip for trip in trip_list if trip.Id not in trip_ids_now] extra_trips = rnd.sample(other_trips, MIN_MATCH - record_count) trips_to_process.extend(extra_trips) record_count = len(trips_to_process) # get randoom other driver but make sure it is not this one rand_drivers = driver_source.get_rand_drivers(1) while rand_drivers[0].id() == main_driver.id(): rand_drivers = driver_source.get_rand_drivers(1) # add in all the trips for this random driver rand_trips = rand_drivers[0].get_all_trips() trips_to_process.extend( rnd.sample(rand_trips,record_count) ) # classify the trips (set Y) targ = np.full((len(trips_to_process),1),False,dtype=bool) targ[0:record_count]=True target = targ.flatten() # create a vector to hold features that will be calculated trips_features=np.zeros(shape=[len(trips_to_process),11], dtype=float) # calc and add in the features... for idx, trip in enumerate(trips_to_process): mean_speed, mean_accel, mean_decel = trip.mean_speed_accel_decel() trips_features[idx,0] = mean_speed trips_features[idx,1] = mean_accel trips_features[idx,2] = mean_decel std_speed, std_accel, st_decel = trip.std_speed_accel_decel() trips_features[idx,3] = std_speed trips_features[idx,4] = std_accel trips_features[idx,5] = st_decel m_ta_l, s_ta_l = trip.mean_std_tang_accel_left() trips_features[idx,6] = m_ta_l trips_features[idx,7] = s_ta_l acpm = trip.accel_change_per_meter() trips_features[idx,8] = acpm m_ac, s_ac = trip.mean_std_accel_changes() trips_features[idx,9] = m_ac trips_features[idx,10] = s_ac # set up our split for train cross validation and test X_train, X_test, y_train, y_test = cross_validation.train_test_split( trips_features, target, test_size=0.4) # set up our grid to find optimal parameters param_grid = [{'learning_rate': [0.01, 0.1, 1], 'n_estimators': [100,200], 'max_depth': [1,2,3]}] top_score = 0.0 best_params = None scores = ['precision', 'recall'] for score in scores: clf = GridSearchCV(GradientBoostingClassifier(), param_grid, scoring=score) clf.fit(X_train, y_train) print("Best parameters set found on training set:") print() print(clf.best_estimator_) print() for params, mean_score, scores in clf.grid_scores_: if mean_score > top_score: top_score = mean_score best_params = params print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() / 2, params)) print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() y_true, y_pred = y_test, clf.predict(X_test) print(classification_report(y_true, y_pred)) print() print print best_params
def run_process(driver_list, fname): FEATURE_COUNT = 20 RAND_DRIVER_COUNT = 5 driver_source = Driver.DriverSource() for the_driver in driver_list: main_driver = driver_source.get_driver(the_driver) trip_list = main_driver.get_trips() record_count = len(trip_list) # set up the feature and target arrays and set the first 200 targets to True trips_features=np.zeros(shape=[2 * record_count, FEATURE_COUNT], dtype=float) targ = np.zeros(shape=[2 * record_count,1],dtype=int) targ[0:record_count]=1 target = targ.flatten() # fill out the feature array for the main driver... for idx, trip in enumerate(trip_list): p_accel = trip.percentiles_accel() p_decel = trip.percentiles_decel() p_speed = trip.percentiles_speed() p_delta = trip.percentiles_delta_accel() p_tang = trip.percentiles_tang_accel() trips_features[idx,0] = p_accel[0] trips_features[idx,1] = p_accel[1] trips_features[idx,2] = p_accel[2] trips_features[idx,3] = p_accel[3] trips_features[idx,4] = p_decel[0] trips_features[idx,5] = p_decel[1] trips_features[idx,6] = p_decel[2] trips_features[idx,7] = p_decel[3] trips_features[idx,8] = p_speed[0] trips_features[idx,9] = p_speed[1] trips_features[idx,10] = p_speed[2] trips_features[idx,11] = p_speed[3] trips_features[idx,12] = p_delta[0] trips_features[idx,13] = p_delta[1] trips_features[idx,14] = p_delta[2] trips_features[idx,15] = p_delta[3] trips_features[idx,16] = p_tang[0] trips_features[idx,17] = p_tang[1] trips_features[idx,18] = p_tang[2] trips_features[idx,19] = p_tang[3] # now get bunch random drivers... rand_drivers = driver_source.get_rand_drivers(RAND_DRIVER_COUNT) while main_driver in rand_drivers: rand_drivers = driver_source.get_rand_drivers(RAND_DRIVER_COUNT) # for each of the rand drivers, complete the feature array # and train the model on the features. prob_array = np.zeros(shape=[record_count, RAND_DRIVER_COUNT], dtype=float) col_idx = -1 for drv_rand in rand_drivers: for i, trip in enumerate(trip_list): p_accel = trip.percentiles_accel() p_decel = trip.percentiles_decel() p_speed = trip.percentiles_speed() p_delta = trip.percentiles_delta_accel() p_tang = trip.percentiles_tang_accel() idx = record_count + i trips_features[idx,0] = p_accel[0] trips_features[idx,1] = p_accel[1] trips_features[idx,2] = p_accel[2] trips_features[idx,3] = p_accel[3] trips_features[idx,4] = p_decel[0] trips_features[idx,5] = p_decel[1] trips_features[idx,6] = p_decel[2] trips_features[idx,7] = p_decel[3] trips_features[idx,8] = p_speed[0] trips_features[idx,9] = p_speed[1] trips_features[idx,10] = p_speed[2] trips_features[idx,11] = p_speed[3] trips_features[idx,12] = p_delta[0] trips_features[idx,13] = p_delta[1] trips_features[idx,14] = p_delta[2] trips_features[idx,15] = p_delta[3] trips_features[idx,16] = p_tang[0] trips_features[idx,17] = p_tang[1] trips_features[idx,18] = p_tang[2] trips_features[idx,19] = p_tang[3] learning_rate = 0.1 n_estimators = 200 max_depth = 2 scaler = preprocessing.StandardScaler().fit(trips_features) X_train_scaled = scaler.transform(trips_features) #clf = RandomForestClassifier() clf = SVC(C=1, probability=True, kernel='rbf',gamma=0.001) clf.fit(X_train_scaled, target) X_pred_scaled = scaler.transform(trips_features[0:record_count,:]) probs = clf.predict_proba(X_pred_scaled) class_label_index = 0 if clf.classes_[1] == 1: class_label_index = 1 col_idx += 1 prob_array[:,col_idx][:] = probs[:,class_label_index] trip_probabilities = np.mean(prob_array,axis=1) print str(int(main_driver.id())) + ' - output to file....' with open(fname,'a') as writer: for trip, prob in zip(trip_list, trip_probabilities): writer.write( str(int(main_driver.id())) + '_' + str(int(trip.Id)) + ', ' + str(prob) + '\n')