Example #1
0
def load_sequence_data_by_tshark(first_n_pkts_input_file, separator=','):
    """

    :param first_n_pkts_input_file: E.g. input_file = '../results/AUDIO_first_n_pkts_10_all_in_one_file.txt'
    :param separator:
    :return: X=Features, Y=Label
    """

    data = []
    label = []
    with open(first_n_pkts_input_file, 'r') as fid_in:
        line = fid_in.readline()
        while line:
            # No,  time,           srcIP     dstIP,    protocol, pkts_size, srcPort, dstPort
            line_arr = line.split()
            if len(line_arr) < 9:
                print('skip: ', line[:-2])  # reomve '\n'
                line = fid_in.readline()
                continue
            data.append([line_arr[-3], line_arr[-2]])
            # print([line_arr[-3], line_arr[-3]])
            label.append(line_arr[-1].split('\n')[0])
            line = fid_in.readline()

    Y = change_label(label)
    new_data = normalize_data(np.asarray(data, dtype=float),
                              range_value=[0, 1],
                              eps=1e-5)
    X = []
    for idx in range(len(new_data)):
        line_arr = new_data[idx]
        # len_tmp = int(line_arr[4])  # length of pkts_list
        line_tmp = []
        # for i in range(1, len_tmp + 1):  # len(pkts_list), [1, len_tmp+1)
        #     if i == 1:
        #         line_tmp.append([line_arr[0],line_arr[1],line_arr[2], line_arr[3], line_arr[4 + i], line_arr[4 + len_tmp + i]])  # srcport, dstport, [pkts_lst[0], flow_duration]
        #     else:
        #         line_tmp.append([line_arr[0],line_arr[1],line_arr[2], line_arr[3],line_arr[4 + i], line_arr[
        #             4 + len_tmp + (i + 1)]])  # [pkts_lst[0], intr_tm_lst[1]], intr_tm_lst from 1, 2, ...

        # line_tmp=[line_arr[-3], line_arr[-3]]  # pkts_len
        line_tmp = [line_arr]
        X.append(line_tmp)

    return X, Y
Example #2
0
def load_sequence_data_backup(first_n_pkts_input_file, separator=','):
    # input_file = '../results/AUDIO_first_n_pkts_10_all_in_one_file.txt'
    data = []
    label = []
    with open(first_n_pkts_input_file, 'r') as fid_in:
        line = fid_in.readline()
        while line:
            ### srcIP, dstIP, srcport, dstport, len(pkts), pkts_lst, flow_duration, intr_time_lst, label
            line_arr = line.split(separator)
            len_tmp = int(line_arr[4])  # length of pkts_list
            data.append(line_arr[:-1])
            label.append(line_arr[-1].split('\n')[0])
            line = fid_in.readline()

    # X = normalize_data(np.asarray(X, dtype=float), range_value=[0, 1], eps=1e-5)
    Y = change_label(label)
    new_data = normalize_data(np.asarray(data, dtype=float),
                              range_value=[0, 1],
                              eps=1e-5)
    X = []
    for idx in range(len(new_data)):
        line_arr = new_data[idx]
        # len_tmp = int(line_arr[4])  # length of pkts_list
        line_tmp = []
        for i in range(1, len_tmp + 1):  # len(pkts_list), [1, len_tmp+1)
            if i == 1:
                line_tmp.append([
                    line_arr[0], line_arr[1], line_arr[2], line_arr[3],
                    line_arr[4 + i], line_arr[4 + len_tmp + i]
                ])  # srcport, dstport, [pkts_lst[0], flow_duration]
            else:
                line_tmp.append([
                    line_arr[0], line_arr[1], line_arr[2], line_arr[3],
                    line_arr[4 + i], line_arr[4 + len_tmp + (i + 1)]
                ])  # [pkts_lst[0], intr_tm_lst[1]], intr_tm_lst from 1, 2, ...

        X.append(line_tmp)

    return X, Y
    integer_encoded = label_integer.reshape(len(label_integer), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)

    return np.array(onehot_encoded, dtype=int)


if __name__ == '__main__':
    torch.manual_seed(1)  # reproducible

    input_file = '../results/AUDIO_first_n_pkts_10_all_in_one_file.txt'
    # X, Y = load_data(input_file)
    X, Y = load_data_compute_mean(input_file)
    X = normalize_data(np.asarray(X, dtype=float),
                       range_value=[0, 1],
                       eps=1e-5)
    Y = change_label(Y)
    X_train, X_test, y_train, y_test = achieve_train_test_data(X,
                                                               Y,
                                                               train_size=0.9,
                                                               shuffle=True)

    ann = MLP(BATCH_SIZE=20,
              first_n_pkts=10,
              epochs=10,
              num_class=len(Counter(y_train)))
    # training_set = Data.TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))  # X, Y
    one_hot_y_train = one_hot_sklearn(y_train)
    training_set = (X_train, y_train)
    ann.train(training_set)

    # show_figure(ann.train_hist['loss'])