def model_data(): traing_data = s3_bucket_function.s3BucketRead("hmitraningdata", "hmitraningdata.csv", "csv") traing_data["Target(Class)"].value_counts() traing_data["class_name"] = traing_data["Target(Class)"].map({ 1: "Sudden Acceleration", 2: "Sudden Right Turn", 3: "Sudden Left Turn", 4: "Sudden Break", }) #plt.figure(figsize=(20,10)) #sb.countplot(x = "class_name",data = traing_data) #plt.show() X_feature = traing_data.iloc[:, 1:7] Y_class = traing_data["class_name"] X_feature.shape X_train, X_test, Y_train, Y_test = train_test_split(X_feature, Y_class, test_size=.30, random_state=101) #Total Sample in Training data and Testing data #print("X_train :{},X_test: {} ,Y_train: {} and Y_test: {}".format(len(X_train),len(X_test),len(Y_train),len(Y_test))) max_k = int(np.sqrt(len(X_train))) #max_k k = list(range(max_k - 1)) len(k) Ya = Y_test # X_test list_acc = [] for i in range(1, max_k): #print(i) knn_trainer = KNeighborsClassifier(n_neighbors=i) knn_learner = knn_trainer.fit(X_train, Y_train) #Prediction for X_test Yp_knn = knn_learner.predict(X_test) acc_knn = accuracy_score(Ya, Yp_knn) * 100 #print("acc of knn {} for k {}".format(acc_knn,i)) list_acc.append(acc_knn) len(list_acc) list_arr = np.array(list_acc) max_acc = list_arr.max() opt_k = list_arr.argmax() #print(max_acc, opt_k) #plt.plot(k,list_acc) #plt.plot(k,list_acc,'r*') #plt.xlabel("k....>") #plt.ylabel("acc of k") #plt.show() #Selected model knn_selected = KNeighborsClassifier(n_neighbors=opt_k) knn_model = knn_selected.fit(X_train, Y_train) return knn_model
def driver_trip_analysis_function(bucketName, driver_id_maxtimesatmp, filetype): real_driver = s3_bucket_function.s3BucketRead("hmimlbucket", driver_id_maxtimesatmp, filetype) Acc = real_driver.iloc[:, 4:7] Gyro = real_driver.iloc[:, 7:10] data_acc_gyro = pd.concat([Gyro, Acc], axis=1) Driver_behaviour = knn_model.predict(data_acc_gyro) Result_driver = pd.DataFrame({"Prediction": Driver_behaviour}) return Result_driver, real_driver, Driver_behaviour
def real_data(): #Master Data real_driver_master = s3_bucket_function.s3BucketRead( "hmimaster", "master", "csv") real_driver_master = real_driver_master Acc = real_driver_master.iloc[:, 5:8] Gyro = real_driver_master.iloc[:, 8:11] data_acc_gyro = pd.concat([Gyro, Acc], axis=1) data_acc_gyro = data_acc_gyro Driver_behaviour = knn_model.predict(data_acc_gyro) Result_driver = pd.DataFrame({"Prediction": Driver_behaviour}) result_hmi = Result_driver["Prediction"].value_counts()
def calculate_all_raw_file(): file_list = s3_bucket_function.list_of_file_name_from_s3_bucket( "febappuser") all_cav_data = {} for i in range(len(file_list)): data = s3_bucket_function.s3BucketRead("febappuser", file_list[i], "txt") all_cav_data[i] = raw_to_csv(data, file_list[i]) temp = file_list[i] file_name = re.findall('\D+[\d]+', temp) file_name = file_name[0] + file_name[1] s3_bucket_function.s3BucketWrite("febappcsv", file_name, all_cav_data[i]) return all_cav_data
def merge_result_file(result_file): result_file = result_file colums_name = [ 'Unnamed: 0', 'sudden_left_turn', 'sudden_acceleration', 'sudden_break', 'sudden_right_turn', 'driver', 'trip_duration', 'speed_max', 'speed_min', 'speed_avg', 'start_lat', 'start_lon', 'end_lat', 'end_lon', 'start_address', 'end_address', 'idle_time', 'final_score' ] df = pd.DataFrame(columns=colums_name) for i in range(len(result_file)): temp = s3_bucket_function.s3BucketRead("hmiresultdriverbehaviourcsv", result_file[i], "csv") df = df.append(temp, ignore_index=False) df = df.drop(["Unnamed: 0"], axis=1) #temp_value = temp.values.tolist()[0] #df.insert(i,colums_name,temp_value) return df