def model_data():
    traing_data = s3_bucket_function.s3BucketRead("hmitraningdata",
                                                  "hmitraningdata.csv", "csv")
    traing_data["Target(Class)"].value_counts()
    traing_data["class_name"] = traing_data["Target(Class)"].map({
        1:
        "Sudden Acceleration",
        2:
        "Sudden Right Turn",
        3:
        "Sudden Left Turn",
        4:
        "Sudden Break",
    })
    #plt.figure(figsize=(20,10))
    #sb.countplot(x = "class_name",data = traing_data)
    #plt.show()
    X_feature = traing_data.iloc[:, 1:7]
    Y_class = traing_data["class_name"]
    X_feature.shape
    X_train, X_test, Y_train, Y_test = train_test_split(X_feature,
                                                        Y_class,
                                                        test_size=.30,
                                                        random_state=101)
    #Total Sample in Training data and Testing data
    #print("X_train :{},X_test: {} ,Y_train: {} and Y_test: {}".format(len(X_train),len(X_test),len(Y_train),len(Y_test)))
    max_k = int(np.sqrt(len(X_train)))
    #max_k
    k = list(range(max_k - 1))
    len(k)

    Ya = Y_test  # X_test
    list_acc = []
    for i in range(1, max_k):
        #print(i)
        knn_trainer = KNeighborsClassifier(n_neighbors=i)
        knn_learner = knn_trainer.fit(X_train, Y_train)
        #Prediction for X_test
        Yp_knn = knn_learner.predict(X_test)
        acc_knn = accuracy_score(Ya, Yp_knn) * 100
        #print("acc of knn {} for k {}".format(acc_knn,i))
        list_acc.append(acc_knn)
    len(list_acc)
    list_arr = np.array(list_acc)
    max_acc = list_arr.max()
    opt_k = list_arr.argmax()
    #print(max_acc, opt_k)

    #plt.plot(k,list_acc)
    #plt.plot(k,list_acc,'r*')
    #plt.xlabel("k....>")
    #plt.ylabel("acc of k")
    #plt.show()

    #Selected model
    knn_selected = KNeighborsClassifier(n_neighbors=opt_k)
    knn_model = knn_selected.fit(X_train, Y_train)
    return knn_model
def driver_trip_analysis_function(bucketName, driver_id_maxtimesatmp,
                                  filetype):
    real_driver = s3_bucket_function.s3BucketRead("hmimlbucket",
                                                  driver_id_maxtimesatmp,
                                                  filetype)
    Acc = real_driver.iloc[:, 4:7]
    Gyro = real_driver.iloc[:, 7:10]
    data_acc_gyro = pd.concat([Gyro, Acc], axis=1)
    Driver_behaviour = knn_model.predict(data_acc_gyro)

    Result_driver = pd.DataFrame({"Prediction": Driver_behaviour})

    return Result_driver, real_driver, Driver_behaviour
def real_data():
    #Master Data
    real_driver_master = s3_bucket_function.s3BucketRead(
        "hmimaster", "master", "csv")

    real_driver_master = real_driver_master
    Acc = real_driver_master.iloc[:, 5:8]
    Gyro = real_driver_master.iloc[:, 8:11]
    data_acc_gyro = pd.concat([Gyro, Acc], axis=1)

    data_acc_gyro = data_acc_gyro
    Driver_behaviour = knn_model.predict(data_acc_gyro)

    Result_driver = pd.DataFrame({"Prediction": Driver_behaviour})

    result_hmi = Result_driver["Prediction"].value_counts()
Beispiel #4
0
def calculate_all_raw_file():
    file_list = s3_bucket_function.list_of_file_name_from_s3_bucket(
        "febappuser")
    all_cav_data = {}
    for i in range(len(file_list)):
        data = s3_bucket_function.s3BucketRead("febappuser", file_list[i],
                                               "txt")
        all_cav_data[i] = raw_to_csv(data, file_list[i])

        temp = file_list[i]
        file_name = re.findall('\D+[\d]+', temp)
        file_name = file_name[0] + file_name[1]

        s3_bucket_function.s3BucketWrite("febappcsv", file_name,
                                         all_cav_data[i])

    return all_cav_data
def merge_result_file(result_file):
    result_file = result_file
    colums_name = [
        'Unnamed: 0', 'sudden_left_turn', 'sudden_acceleration',
        'sudden_break', 'sudden_right_turn', 'driver', 'trip_duration',
        'speed_max', 'speed_min', 'speed_avg', 'start_lat', 'start_lon',
        'end_lat', 'end_lon', 'start_address', 'end_address', 'idle_time',
        'final_score'
    ]

    df = pd.DataFrame(columns=colums_name)

    for i in range(len(result_file)):
        temp = s3_bucket_function.s3BucketRead("hmiresultdriverbehaviourcsv",
                                               result_file[i], "csv")

        df = df.append(temp, ignore_index=False)
        df = df.drop(["Unnamed: 0"], axis=1)
        #temp_value = temp.values.tolist()[0]
        #df.insert(i,colums_name,temp_value)

    return df