def load_sequence_data_by_tshark(first_n_pkts_input_file, separator=','): """ :param first_n_pkts_input_file: E.g. input_file = '../results/AUDIO_first_n_pkts_10_all_in_one_file.txt' :param separator: :return: X=Features, Y=Label """ data = [] label = [] with open(first_n_pkts_input_file, 'r') as fid_in: line = fid_in.readline() while line: # No, time, srcIP dstIP, protocol, pkts_size, srcPort, dstPort line_arr = line.split() if len(line_arr) < 9: print('skip: ', line[:-2]) # reomve '\n' line = fid_in.readline() continue data.append([line_arr[-3], line_arr[-2]]) # print([line_arr[-3], line_arr[-3]]) label.append(line_arr[-1].split('\n')[0]) line = fid_in.readline() Y = change_label(label) new_data = normalize_data(np.asarray(data, dtype=float), range_value=[0, 1], eps=1e-5) X = [] for idx in range(len(new_data)): line_arr = new_data[idx] # len_tmp = int(line_arr[4]) # length of pkts_list line_tmp = [] # for i in range(1, len_tmp + 1): # len(pkts_list), [1, len_tmp+1) # if i == 1: # line_tmp.append([line_arr[0],line_arr[1],line_arr[2], line_arr[3], line_arr[4 + i], line_arr[4 + len_tmp + i]]) # srcport, dstport, [pkts_lst[0], flow_duration] # else: # line_tmp.append([line_arr[0],line_arr[1],line_arr[2], line_arr[3],line_arr[4 + i], line_arr[ # 4 + len_tmp + (i + 1)]]) # [pkts_lst[0], intr_tm_lst[1]], intr_tm_lst from 1, 2, ... # line_tmp=[line_arr[-3], line_arr[-3]] # pkts_len line_tmp = [line_arr] X.append(line_tmp) return X, Y
def load_sequence_data_backup(first_n_pkts_input_file, separator=','): # input_file = '../results/AUDIO_first_n_pkts_10_all_in_one_file.txt' data = [] label = [] with open(first_n_pkts_input_file, 'r') as fid_in: line = fid_in.readline() while line: ### srcIP, dstIP, srcport, dstport, len(pkts), pkts_lst, flow_duration, intr_time_lst, label line_arr = line.split(separator) len_tmp = int(line_arr[4]) # length of pkts_list data.append(line_arr[:-1]) label.append(line_arr[-1].split('\n')[0]) line = fid_in.readline() # X = normalize_data(np.asarray(X, dtype=float), range_value=[0, 1], eps=1e-5) Y = change_label(label) new_data = normalize_data(np.asarray(data, dtype=float), range_value=[0, 1], eps=1e-5) X = [] for idx in range(len(new_data)): line_arr = new_data[idx] # len_tmp = int(line_arr[4]) # length of pkts_list line_tmp = [] for i in range(1, len_tmp + 1): # len(pkts_list), [1, len_tmp+1) if i == 1: line_tmp.append([ line_arr[0], line_arr[1], line_arr[2], line_arr[3], line_arr[4 + i], line_arr[4 + len_tmp + i] ]) # srcport, dstport, [pkts_lst[0], flow_duration] else: line_tmp.append([ line_arr[0], line_arr[1], line_arr[2], line_arr[3], line_arr[4 + i], line_arr[4 + len_tmp + (i + 1)] ]) # [pkts_lst[0], intr_tm_lst[1]], intr_tm_lst from 1, 2, ... X.append(line_tmp) return X, Y
integer_encoded = label_integer.reshape(len(label_integer), 1) onehot_encoded = onehot_encoder.fit_transform(integer_encoded) return np.array(onehot_encoded, dtype=int) if __name__ == '__main__': torch.manual_seed(1) # reproducible input_file = '../results/AUDIO_first_n_pkts_10_all_in_one_file.txt' # X, Y = load_data(input_file) X, Y = load_data_compute_mean(input_file) X = normalize_data(np.asarray(X, dtype=float), range_value=[0, 1], eps=1e-5) Y = change_label(Y) X_train, X_test, y_train, y_test = achieve_train_test_data(X, Y, train_size=0.9, shuffle=True) ann = MLP(BATCH_SIZE=20, first_n_pkts=10, epochs=10, num_class=len(Counter(y_train))) # training_set = Data.TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train)) # X, Y one_hot_y_train = one_hot_sklearn(y_train) training_set = (X_train, y_train) ann.train(training_set) # show_figure(ann.train_hist['loss'])