def get_ow_train_test(path, outpath_train, outpath_test):
    """
    extract opened world instances,  random take 20,000  training set
    the rest of ow instances as ow testing data
    output csv file respectively
    :return:
    """

    "get ow instnace"
    x, y = utils_wf.get_ow_data(path)
    x = pd.DataFrame(x)
    candit_list = list(range(0, len(y)))
    rand_list = random.sample(candit_list, 20000)
    X_train, X_test, y_train, y_test = [], [], [], []
    for i in range(len(y)):
        if i in rand_list:
            X_train.append(x.iloc[i, :])
            y_train.append(y[i])
        else:
            X_test.append(x.iloc[i, :])
            y_test.append(y[i])

    train_ow = utils_wf.convert2dataframe(X_train, y_train)
    test_ow = utils_wf.convert2dataframe(X_test, y_test)

    utils_wf.write2csv(train_ow, outpath_train)
    utils_wf.write2csv(test_ow, outpath_test)
def main2csv():
    "convert burst file to csv file"
    data_type = ['test', 'train']
    for type in data_type:
        path = '../data/WalkieTalkie/defended_batch/%s/' % type
        out_path = '../data/WalkieTalkie/defended_csv/adv_%s_WT.csv' % type
        items = os.listdir(path)
        data_list = []
        labels = []
        for item in items:
            if item[-6:] == '.burst':
                label = int(item.split('-')[0])
                data = load_burst_file(path + item)
                data_list.append(data)
                labels.append(label)

        # tt = pd.DataFrame(data_list)
        # tt['label'] = labels
        # tt.to_csv('../data/WalkieTalkie/defended_csv/orig_%s.csv' % type,index=0)

        "binary to burst"
        data_burst, data_burst_noSlice = utils_wf.burst_transform(
            data_list, slice_threshold=512)
        data_df = utils_wf.convert2dataframe(data_burst_noSlice,
                                             labels,
                                             mode='padding')
        utils_wf.write2csv(data_df, out_path)
        print('{} ... saved successfully.'.format(out_path))
def merge_data(slice_threshold):
    "merge train/test/valid data into one csv file processed source data (remove less than 50 packets and starting with incoming packet)"
    "output data in burst format"

    data_folder = '../data/wf_ow/'
    out_folder = data_folder + '/input_size_' + str(slice_threshold) + '/'
    if not os.path.exists(out_folder):
        os.mkdir(out_folder)

    X_path = [
        data_folder + 'X_train_NoDef.pkl',
        data_folder + 'X_test_Unmon_NoDef.pkl',
        data_folder + 'X_test_Mon_NoDef.pkl', data_folder + 'X_valid_NoDef.pkl'
    ]
    Y_path = [
        data_folder + 'y_train_NoDef.pkl',
        data_folder + 'y_test_Unmon_NoDef.pkl',
        data_folder + 'y_test_Mon_NoDef.pkl', data_folder + 'y_valid_NoDef.pkl'
    ]
    out_path = out_folder + 'data_NoDef_processed.csv'

    X, Y = [], []
    for x_path, y_path in zip(X_path, Y_path):
        X += utils_wf.load_pkl_data(x_path)
        Y += utils_wf.load_pkl_data(y_path)
    print('data instances after merged: {}'.format(len(Y)))

    "remove less than 50 packets and starting with incoming packet"
    X_new, Y_new = utils_wf.data_preprocess(X, Y)
    print('data instances after processed: {}'.format(len(Y_new)))

    "convert to burst"
    x_burst, _ = utils_wf.burst_transform(X_new, slice_threshold)
    data_new = utils_wf.convert2dataframe(x_burst, Y_new)
    utils_wf.write2csv(data_new, out_path)
def tranform2burst(path, out_path, slice_threshold):
    "load processed source data from csv, transform to burst with certain fixed size and write it in csv"

    x, y = utils_wf.load_csv_data(path)
    x_burst, x_burst_nopadding = utils_wf.burst_transform(x, slice_threshold)
    burst_data = utils_wf.convert2dataframe(x_burst, y, mode='padding')
    utils_wf.write2csv(burst_data, out_path)
def extract_balance_data(path, num, outpath, data_type):
    "extract num instances for each class, write into csv"

    if data_type == 'cw':  #[ow,cw]
        X, Y = utils_wf.extract_data_each_class(path, num)
    elif data_type == 'ow':
        X, Y = utils_wf.extract_data_each_class_ow(path, num)
    data = utils_wf.convert2dataframe(X, Y, mode='NoPadding')
    utils_wf.write2csv(data, outpath)
def build_ow_train(path1, path2, outpath):
    """
    concat two dataset
    :param path1: ow train data
    :param path2: cw train data
    :return:
    """

    X_1, y_1 = utils_wf.load_csv_data(path1)
    X_2, y_2 = utils_wf.load_csv_data(path2)

    train_1 = utils_wf.convert2dataframe(X_1, y_1)
    train_2 = utils_wf.convert2dataframe(X_2, y_2)

    merge = [train_1, train_2]
    train_merge = pd.concat(merge)

    utils_wf.write2csv(train_merge, outpath)
Beispiel #7
0
def gen_split_file(split_id, X, y, Adversary):
    """
    :param split_id: index for splitting
    :param X: X data need to be splitted
    :param y: y label to split
    :return:
    """
    "save splitted file in csv"
    for i, id in enumerate(split_id):
        print('*' * 30)
        print('split %d' % i)
        train_id = id[0]
        test_id = id[1]
        print('train_id', len(train_id))
        print('test id', len(test_id))
        # for train_id, test_id in id:
        df_train = utils_wf.convert2dataframe(X.iloc[train_id],
                                              y.iloc[train_id])
        df_test = utils_wf.convert2dataframe(X.iloc[test_id], y.iloc[test_id])
        utils_wf.write2csv(df_train, get_output_name('train', i, Adversary))
        utils_wf.write2csv(df_test, get_output_name('test', i, Adversary))
def pkl2burst_csv(slice_threshold):
    """
    load pkl file, transform data to burst and write it into csv
    No preprocessing: keep the original way. do not remove instance less than 50 packets and starting with incoming packet
    """

    type = 'valid'  #['test', 'train', 'valid']
    folder_name = 'wf_ow'  #[
    data_name = 'Nodef'  #['NoDef', 'WalkieTalkie' ]

    # x_path = '../data/' + folder_name + '/X_' + type + '_' + data_name + '.pkl'
    # out_path = '../data/' + folder_name + '/' + type + '_' + data_name + '.csv'
    # y_path = '../data/' + folder_name + '/y_' + type + '_' + data_name + '.pkl'

    # x_path = '../data/NoDef/X_valid_NoDef.pkl'
    # out_path = '../data/NoDef/valid_NoDef.csv'
    # y_path = '../data/NoDef/y_valid_NoDef.pkl'

    # x_path = '../data/NoDef/X_train_NoDef.pkl'
    # out_path = '../data/NoDef/train_NoDef.csv'
    # y_path = '../data/NoDef/y_train_NoDef.pkl'

    "processing pkl file"
    # x = utils_wf.load_pkl_data(x_path)
    # y = utils_wf.load_pkl_data(y_path)

    "processing wang data csv file"
    file_id = 'wang_UnMon'
    x_path = '../data/wf_wang/' + file_id + '.csv'
    out_path = '../data/wf_wang/' + file_id + '_burst.csv'
    x, y = utils_wf.load_csv_data(x_path)

    # X_new, y_new = utils_wf.data_preprocess(x, y) #remove trace less than 50 packets and starting with incoming packet
    x_burst, x_burst_noSlicing = utils_wf.burst_transform(x, slice_threshold)

    # utils_wf.size_distribution(x_burst_noSlicing,file_id)

    burst_data = utils_wf.convert2dataframe(x_burst, y)
    utils_wf.write2csv(burst_data, out_path)
Beispiel #9
0
    def write2csv(self, X, Y):

        df = utils_wf.convert2dataframe(X, Y, mode='NoPadding')
        df.to_csv(self.data_path + '/' + self.opts['output_file'], index=0)
        print('file has been processed successfully!')