Beispiel #1
0
            )
                , style={'textAlign': 'left'}),
            html.P(

                """ This application extracts text across three different channels of communication of one user, P-Allen. It then performs a statistical modeling technique to find interesting features. The 
                table below shows the result and it contains the 15 most important features for each channel.   """
            )
        ]
        ),

        html.Div(children=[
            html.P('Select a channel - sms: 1, emails: 2, chats: 3'),
            html.Div(
                [
                    dcc.Dropdown(id='dropdown', options=[
                        {'label': i, 'value': i} for i in extract_features().channel_code.unique()
                    ], multi=True, placeholder='Filter by channel...'),

                    html.Div(id='output_div')
                ], className="row"),
            html.Div(
                [
                    dash_table.DataTable(id='table', columns=[])
                ], className="ten columns"),

            html.Div(
                [
                    html.P('Behavox assignment - Developed by Mouhameth T. Faye ', style={'display': 'inline'}),
                    html.A('*****@*****.**', href='mailto: [email protected]')
                ], className="twelve columns",
                style={'fontSize': 14, 'padding-top': 18}
Beispiel #2
0
def display_table(selector):
    if selector is None:
        return generate_table(extract_features())

    dff = extract_features()[extract_features().channel_code.str.contains('|'.join(selector))]
    return generate_table(dff)
Beispiel #3
0
                arr[i] = normalise(arr[i], -2000, 2000)
            elif i in range(9, 12):
                arr[i] = normalise(arr[i], -250000, 250000)
            elif i in range(12, 15):
                arr[i] = normalise(arr[i], -2000, 2000)
            elif i in range(15, 18):
                arr[i] = normalise(arr[i], -250000, 250000)
    data_to_test = []
    for i in range(len(data_all)):
        if i in range(0, 60):
            # print(data_all[i])
            data_to_test.append(data_all[i])
    print(len(data_to_test[0]))

    features = []
    data_line = extract_features(np.asarray(data_to_test))
    features.append(data_line)

    features = np.array(features)
    print(features.shape)

    model_path_knn = os.path.join(PROJECT_DIR, 'dance-dance-software',
                                  'models', 'kNN.pkl')
    model_path_rf = os.path.join(PROJECT_DIR, 'dance-dance-software', 'models',
                                 'randomForest.pkl')
    model_path_svm = os.path.join(PROJECT_DIR, 'dance-dance-software',
                                  'models', 'svm.pkl')
    rf = joblib.load(model_path_rf)
    knn = joblib.load(model_path_knn)
    svm = joblib.load(model_path_svm)
Beispiel #4
0
# Setting up to traverse through CSV files to extract features
for folder in os.listdir(data_dir):
    data_segments = []
    vibrated_folder = os.path.abspath(os.path.join(data_dir, folder))
    list_of_csv = os.listdir(vibrated_folder)
    for csv in list_of_csv:
        csv_path = os.path.join(vibrated_folder, csv)

        # Making the entire CSV file return a list of list of segments
        data_segments = create_windows(csv_path)

        # For each segment, i extract the features
        for i in data_segments:
            features_csv = []
            features_csv = features_extraction.extract_features(i)
            label = str(folder)
            features_csv.append(label)
            main_df = main_df.append(pd.Series(features_csv,
                                               index=main_df.columns),
                                     ignore_index=True)

# Export to CSV for machine learning
main_df.to_csv("./features.csv")

# ##################################################################
# # For graph plotting and visualization purposes
# ##################################################################
#
# data = "D:\\Y4S1\\CS4276\\device-fingerprint\\data\\black_huawei\\gyro_100hz_14102019_143558.csv"
# # data = "D:\\Y4S1\\CS4276\\device-fingerprint\\data\\htc_u11\\gyro_100hz_16102019_205643.csv"
Beispiel #5
0
    def run(self):
        my_pi = RaspberryPi(ip_addr, port_num)
        #my_ML = ML()
        danceMove = ""
        power = ""
        voltage = ""
        current = ""
        cumpower = ""
        ml_data = []
        while True:
            queueLock.acquire()
            if not dataQueue.empty(
            ):  #check if queue is empty or not. If empty, dont try to take from queue
                packet_data = dataQueue.get()
                #print("data from queue: " + str(packet_data)) #check for multithreading using this line
                power = packet_data["power"]
                voltage = packet_data["voltage"]
                current = packet_data["current"]
                cumpower = packet_data["cumpower"]
                ml_data.append(packet_data["01"] + packet_data["02"] +
                               packet_data["03"])
            queueLock.release()
            #ML prediction
            if len(ml_data) == 60:
                for arr in ml_data:
                    for i in range(len(arr)):
                        if i < 3:
                            arr[i] = normalise(arr[i], -2000, 2000)
                        elif i in range(3, 6):
                            arr[i] = normalise(arr[i], -250000, 250000)
                        elif i in range(6, 9):
                            arr[i] = normalise(arr[i], -2000, 2000)
                        elif i in range(9, 12):
                            arr[i] = normalise(arr[i], -250000, 250000)
                        elif i in range(12, 15):
                            arr[i] = normalise(arr[i], -2000, 2000)
                        elif i in range(15, 18):
                            arr[i] = normalise(arr[i], -250000, 250000)
                arr_data = []
                for array in ml_data:
                    arr_raw = []
                    arr_raw += [
                        array[0], array[1], array[2], array[6], array[7],
                        array[8], array[12], array[13], array[14], array[3],
                        array[4], array[5], array[9], array[10], array[11],
                        array[15], array[16], array[17]
                    ]
                    arr_data.append(arr_raw)

                test_sample = arr_data
                test_sample = np.array(test_sample)
                test_sample = test_sample.reshape(1, n_steps, n_length,
                                                  n_features)
                with graph.as_default():
                    result_keras = model_keras.predict(test_sample,
                                                       batch_size=96,
                                                       verbose=0)
                data_line = extract_features(np.asarray(ml_data))
                result_int_keras = int(np.argmax(result_keras[0]))
                danceMove = labels_dict[result_int_keras]

                prediction_knn = model_knn.predict(data_line)
                prediction_rf = model_rf.predict(data_line)
                prediction_svm = model_svm.predict(data_line)

                pred_list = []
                pred_list.append(prediction_knn[0])
                pred_list.append(prediction_rf[0])
                pred_list.append(prediction_svm[0])
                pred_list.append(danceMove)

                from collections import Counter

                most_common, num_most_common = Counter(pred_list).most_common(
                    1)[0]
                if num_most_common >= 3:
                    danceMove = most_common

                data = Data(my_pi.sock)
                data.sendData(danceMove, power, voltage, current, cumpower)

            if len(ml_data) == 90:
                queueLock.acquire()
                dataQueue.queue.clear()
                if dataQueue.empty():
                    print("queue has been emptied for new window")
                ml_data = []
                queueLock.release()
def read_data():

    print('\nReading data from CSV file...')

    # Read data from file 'sha256_family.csv'
    malwares = pd.read_csv('drebin\sha256_family.csv', dtype=str)

    print('Found (' + str(len(malwares.index)) + ') malwares in csv file.')

    print('Reading dataset files...')

    # Read all the files in the feature vector path specified path
    data_path = os.path.join(os.getcwd(), 'drebin', 'feature_vectors')
    features_vector_path = data_path
    dataset_files = os.listdir(features_vector_path)

    dataset_files_length = len(dataset_files)
    print('Found (' + str(dataset_files_length) + ') files to classify.')

    # Separate malwares from non-malwares [Building ground truth arrays]
    malware_files = []
    not_malware_files = []
    for file_name in dataset_files:
        if file_name in (malwares.values[:, 0]):
            malware_files.append(file_name)
        else:
            not_malware_files.append(file_name)

    malware_files_length = len(malware_files)
    not_malware_files_length = len(not_malware_files)
    print('Found (' + str(malware_files_length) + ') malware files.')
    print('Found (' + str(not_malware_files_length) + ') safe files.')

    # Extract features from  dataset files, and label them
    # 1 for malware, 0 otherwise
    # x = {set of features}, y = {0|1}
    x = []
    y = []

    # extract features occurrences in malware files
    for malware_file in malware_files:
        with open(data_path + '/' + malware_file, 'r') as file:
            file_content = file.read().splitlines()
            sample = features_extraction.extract_features(file_content)
            x.append(sample)
            y.append(1)

    # extract features occurrences in safe (non malware) files
    counter = 1  # remove this to work with unbalanced dataset
    for non_malware_file in not_malware_files:
        # remove the following lines to work with unbalanced dataset
        counter += 1
        if(counter == malware_files_length):
            break
        else:
            # remove lines up to here
            with open(data_path + '/' + non_malware_file, 'r') as file:
                file_content = file.read().splitlines()
                sample = features_extraction.extract_features(file_content)
                x.append(sample)
                y.append(0)

    x = np.array(x)
    y = np.array(y)
    print("\nFeatures & Labels arrays' shapes, respectively: " +
          str(x.shape), str(y.shape))
    return x, y
Beispiel #7
0
for j in range(len(labels_dict)):
    for i in range(5):
        i_str = str(i+1)
        if len(i_str) is 1:
            i_str = '0' + i_str

        if i_str == '28' and labels[j] in unavailable_labels:
            continue
        readings, label = create_windows(os.path.join(data_processed_path, (labels[j] + i_str + ".csv")), 60, 30)
        data_for_extraction.extend(readings)
        labels_for_extraction.extend(label)

features = []
for i in range(len(data_for_extraction)):
    data_line = extract_features(np.asarray(data_for_extraction[i]))
    data_line.append(labels_for_extraction[i] + 1)
    features.append(data_line)


data_csv_filename = os.path.join(data_processed_path, 'dataFeatures.csv')
labels_csv_filename = os.path.join(data_processed_path, 'labelFeatures.csv')

features_df = pd.DataFrame(features)
features_df.to_csv(data_csv_filename, header=[
    "val1", "val2", "val3", "val4", "val5", "val6", "val7", "val8", "val9", "val10", "val11", "val12", "val13", "val14",
    "val15", "val16", "val17", "val18", "val19", "val20", "val21", "val22", "val23", "val24", "val25", "val26", "val27",
    "val28", "val29", "val30", "val31", "val32", "val33", "val34", "val35", "val36", "val37", "val38", "val39", "val40",
    "val41", "val42", "val43", "val44", "val45", "val46", "val47", "val48", "val49", "val50", "val51", "val52", "val53",
    "val54", "val55", "val56", "val57", "val58", "val59", "val60", "val61", "val62", "val63", "val64", "val65", "val66",
    "val67", "val68", "val69", "val70", "val71", "val72", "dance"], index=None, sep=',')