) , style={'textAlign': 'left'}), html.P( """ This application extracts text across three different channels of communication of one user, P-Allen. It then performs a statistical modeling technique to find interesting features. The table below shows the result and it contains the 15 most important features for each channel. """ ) ] ), html.Div(children=[ html.P('Select a channel - sms: 1, emails: 2, chats: 3'), html.Div( [ dcc.Dropdown(id='dropdown', options=[ {'label': i, 'value': i} for i in extract_features().channel_code.unique() ], multi=True, placeholder='Filter by channel...'), html.Div(id='output_div') ], className="row"), html.Div( [ dash_table.DataTable(id='table', columns=[]) ], className="ten columns"), html.Div( [ html.P('Behavox assignment - Developed by Mouhameth T. Faye ', style={'display': 'inline'}), html.A('*****@*****.**', href='mailto: [email protected]') ], className="twelve columns", style={'fontSize': 14, 'padding-top': 18}
def display_table(selector): if selector is None: return generate_table(extract_features()) dff = extract_features()[extract_features().channel_code.str.contains('|'.join(selector))] return generate_table(dff)
arr[i] = normalise(arr[i], -2000, 2000) elif i in range(9, 12): arr[i] = normalise(arr[i], -250000, 250000) elif i in range(12, 15): arr[i] = normalise(arr[i], -2000, 2000) elif i in range(15, 18): arr[i] = normalise(arr[i], -250000, 250000) data_to_test = [] for i in range(len(data_all)): if i in range(0, 60): # print(data_all[i]) data_to_test.append(data_all[i]) print(len(data_to_test[0])) features = [] data_line = extract_features(np.asarray(data_to_test)) features.append(data_line) features = np.array(features) print(features.shape) model_path_knn = os.path.join(PROJECT_DIR, 'dance-dance-software', 'models', 'kNN.pkl') model_path_rf = os.path.join(PROJECT_DIR, 'dance-dance-software', 'models', 'randomForest.pkl') model_path_svm = os.path.join(PROJECT_DIR, 'dance-dance-software', 'models', 'svm.pkl') rf = joblib.load(model_path_rf) knn = joblib.load(model_path_knn) svm = joblib.load(model_path_svm)
# Setting up to traverse through CSV files to extract features for folder in os.listdir(data_dir): data_segments = [] vibrated_folder = os.path.abspath(os.path.join(data_dir, folder)) list_of_csv = os.listdir(vibrated_folder) for csv in list_of_csv: csv_path = os.path.join(vibrated_folder, csv) # Making the entire CSV file return a list of list of segments data_segments = create_windows(csv_path) # For each segment, i extract the features for i in data_segments: features_csv = [] features_csv = features_extraction.extract_features(i) label = str(folder) features_csv.append(label) main_df = main_df.append(pd.Series(features_csv, index=main_df.columns), ignore_index=True) # Export to CSV for machine learning main_df.to_csv("./features.csv") # ################################################################## # # For graph plotting and visualization purposes # ################################################################## # # data = "D:\\Y4S1\\CS4276\\device-fingerprint\\data\\black_huawei\\gyro_100hz_14102019_143558.csv" # # data = "D:\\Y4S1\\CS4276\\device-fingerprint\\data\\htc_u11\\gyro_100hz_16102019_205643.csv"
def run(self): my_pi = RaspberryPi(ip_addr, port_num) #my_ML = ML() danceMove = "" power = "" voltage = "" current = "" cumpower = "" ml_data = [] while True: queueLock.acquire() if not dataQueue.empty( ): #check if queue is empty or not. If empty, dont try to take from queue packet_data = dataQueue.get() #print("data from queue: " + str(packet_data)) #check for multithreading using this line power = packet_data["power"] voltage = packet_data["voltage"] current = packet_data["current"] cumpower = packet_data["cumpower"] ml_data.append(packet_data["01"] + packet_data["02"] + packet_data["03"]) queueLock.release() #ML prediction if len(ml_data) == 60: for arr in ml_data: for i in range(len(arr)): if i < 3: arr[i] = normalise(arr[i], -2000, 2000) elif i in range(3, 6): arr[i] = normalise(arr[i], -250000, 250000) elif i in range(6, 9): arr[i] = normalise(arr[i], -2000, 2000) elif i in range(9, 12): arr[i] = normalise(arr[i], -250000, 250000) elif i in range(12, 15): arr[i] = normalise(arr[i], -2000, 2000) elif i in range(15, 18): arr[i] = normalise(arr[i], -250000, 250000) arr_data = [] for array in ml_data: arr_raw = [] arr_raw += [ array[0], array[1], array[2], array[6], array[7], array[8], array[12], array[13], array[14], array[3], array[4], array[5], array[9], array[10], array[11], array[15], array[16], array[17] ] arr_data.append(arr_raw) test_sample = arr_data test_sample = np.array(test_sample) test_sample = test_sample.reshape(1, n_steps, n_length, n_features) with graph.as_default(): result_keras = model_keras.predict(test_sample, batch_size=96, verbose=0) data_line = extract_features(np.asarray(ml_data)) result_int_keras = int(np.argmax(result_keras[0])) danceMove = labels_dict[result_int_keras] prediction_knn = model_knn.predict(data_line) prediction_rf = model_rf.predict(data_line) prediction_svm = model_svm.predict(data_line) pred_list = [] pred_list.append(prediction_knn[0]) pred_list.append(prediction_rf[0]) pred_list.append(prediction_svm[0]) pred_list.append(danceMove) from collections import Counter most_common, num_most_common = Counter(pred_list).most_common( 1)[0] if num_most_common >= 3: danceMove = most_common data = Data(my_pi.sock) data.sendData(danceMove, power, voltage, current, cumpower) if len(ml_data) == 90: queueLock.acquire() dataQueue.queue.clear() if dataQueue.empty(): print("queue has been emptied for new window") ml_data = [] queueLock.release()
def read_data(): print('\nReading data from CSV file...') # Read data from file 'sha256_family.csv' malwares = pd.read_csv('drebin\sha256_family.csv', dtype=str) print('Found (' + str(len(malwares.index)) + ') malwares in csv file.') print('Reading dataset files...') # Read all the files in the feature vector path specified path data_path = os.path.join(os.getcwd(), 'drebin', 'feature_vectors') features_vector_path = data_path dataset_files = os.listdir(features_vector_path) dataset_files_length = len(dataset_files) print('Found (' + str(dataset_files_length) + ') files to classify.') # Separate malwares from non-malwares [Building ground truth arrays] malware_files = [] not_malware_files = [] for file_name in dataset_files: if file_name in (malwares.values[:, 0]): malware_files.append(file_name) else: not_malware_files.append(file_name) malware_files_length = len(malware_files) not_malware_files_length = len(not_malware_files) print('Found (' + str(malware_files_length) + ') malware files.') print('Found (' + str(not_malware_files_length) + ') safe files.') # Extract features from dataset files, and label them # 1 for malware, 0 otherwise # x = {set of features}, y = {0|1} x = [] y = [] # extract features occurrences in malware files for malware_file in malware_files: with open(data_path + '/' + malware_file, 'r') as file: file_content = file.read().splitlines() sample = features_extraction.extract_features(file_content) x.append(sample) y.append(1) # extract features occurrences in safe (non malware) files counter = 1 # remove this to work with unbalanced dataset for non_malware_file in not_malware_files: # remove the following lines to work with unbalanced dataset counter += 1 if(counter == malware_files_length): break else: # remove lines up to here with open(data_path + '/' + non_malware_file, 'r') as file: file_content = file.read().splitlines() sample = features_extraction.extract_features(file_content) x.append(sample) y.append(0) x = np.array(x) y = np.array(y) print("\nFeatures & Labels arrays' shapes, respectively: " + str(x.shape), str(y.shape)) return x, y
for j in range(len(labels_dict)): for i in range(5): i_str = str(i+1) if len(i_str) is 1: i_str = '0' + i_str if i_str == '28' and labels[j] in unavailable_labels: continue readings, label = create_windows(os.path.join(data_processed_path, (labels[j] + i_str + ".csv")), 60, 30) data_for_extraction.extend(readings) labels_for_extraction.extend(label) features = [] for i in range(len(data_for_extraction)): data_line = extract_features(np.asarray(data_for_extraction[i])) data_line.append(labels_for_extraction[i] + 1) features.append(data_line) data_csv_filename = os.path.join(data_processed_path, 'dataFeatures.csv') labels_csv_filename = os.path.join(data_processed_path, 'labelFeatures.csv') features_df = pd.DataFrame(features) features_df.to_csv(data_csv_filename, header=[ "val1", "val2", "val3", "val4", "val5", "val6", "val7", "val8", "val9", "val10", "val11", "val12", "val13", "val14", "val15", "val16", "val17", "val18", "val19", "val20", "val21", "val22", "val23", "val24", "val25", "val26", "val27", "val28", "val29", "val30", "val31", "val32", "val33", "val34", "val35", "val36", "val37", "val38", "val39", "val40", "val41", "val42", "val43", "val44", "val45", "val46", "val47", "val48", "val49", "val50", "val51", "val52", "val53", "val54", "val55", "val56", "val57", "val58", "val59", "val60", "val61", "val62", "val63", "val64", "val65", "val66", "val67", "val68", "val69", "val70", "val71", "val72", "dance"], index=None, sep=',')