Beispiel #1
0
def main_MAWI():
    is_stat = False
    if is_stat:
        import os
        # file_name = 'samplepoint-F_202007011400-src_dst_185.8.54.240.pcap'
        file_name = 'samplepoint-F_202007011400.pcap'
        pcap_file = get_file_path(ipt_dir='original_data/reprst',
                                  dataset_name='MAWI/WIDE_2020',
                                  file_name=file_name)
        stat_file = pcap_file + '-stat.dat'
        print(stat_file)
        if not os.path.exists(stat_file):
            stat = get_stat(pcap_file, out_file=stat_file)
        else:
            stat = load_data(stat_file)

        stat = dict(sorted(stat.items(), key=lambda kv: kv[1], reverse=True))
        for i, (k, v) in enumerate(stat.items()):
            if v > 1000:
                print(f'i={i}, {k}: {v}\n')
        # print(stat)

    ips = [
        '23.222.78.164', '203.78.4.32', '203.78.8.151', '23.223.19.175',
        '114.234.20.197', '114.234.12.139'
    ]
    # ['202.119.210.242', '23.99.220.247', '203.78.23.227', '51.95.212.72', '163.98.16.76',
    # '92.206.43.252', '202.66.205.237', '202.75.33.206', '202.75.33.114', '167.50.204.117']
    direction = 'src_dst'
    file_name = f'samplepoint-F_202007011400.pcap'
    for v in ips:
        if direction == 'src_dst':
            #     # filter pcap: tshark -r samplepoint-F_202007011400.pcap -w samplepoint-F_202007011400-src_dst_202.119.210.242.pcap ip.addr=='202.119.210.242'
            pcap_file = get_file_path(ipt_dir='original_data/reprst',
                                      dataset_name='MAWI/WIDE_2020',
                                      file_name=file_name)
        else:
            pcap_file = get_file_path(ipt_dir='original_data/reprst',
                                      dataset_name='MAWI/WIDE_2020',
                                      file_name=file_name +
                                      f'-src_dst_{v}.pcap')
        out_file = get_file_path(ipt_dir='original_data/reprst',
                                 dataset_name='MAWI/WIDE_2020',
                                 file_name=file_name +
                                 f'-{direction}_{v}.pcap')
        print(pcap_file, out_file)
        filter_ip(pcap_file,
                  out_file,
                  ips=[v],
                  direction=direction,
                  verbose=20)
Beispiel #2
0
def get_scam2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019',
                       out_dir='',
                       dataset_name='scam_192.168.143.42',
                       direction='src'):
    IP = '192.168.143.42'
    normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap')
    check_path(normal_pcap)
    file_name = 'fridge_cam_sound_ghome_2daysactiv-scam_normal.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction)
    normal_flows = _pcap2flows(
        normal_pcap,
        verbose=10)  # ~1000 normal flows, it will generate > 1000 subflows
    max_interval = np.quantile(
        [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
    normal_flows = augment_flows(normal_flows,
                                 step=10,
                                 max_interval=max_interval)
    lg.debug(f'normal_flows: {len(normal_flows)}')

    abnormal_pcap = os.path.join(out_dir, f'pc_{IP}_abnormal.pcap')
    check_path(normal_pcap)
    # file_name = 'samsung_camera-2daysactiv-src_192.168.143.42-anomaly.pca'
    file_name = 'fridge_cam_sound_ghome_2daysactiv-scam_abnormal.pcap'
    pcap_file = get_file_path(ipt_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, abnormal_pcap, ips=[IP], direction=direction)
    abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10)
    abnormal_flows = augment_flows(abnormal_flows,
                                   step=1,
                                   max_interval=max_interval)
    lg.debug(f'after augmenting abnormal_flows: {len(abnormal_flows)}')
    meta = {
        'normal_flows': normal_flows,
        'abnormal_flows': abnormal_flows,
        'normal_pcaps': [normal_pcap],
        'abnormal_pcaps': [abnormal_pcap],
        'direction': direction,
        'in_dir': in_dir
    }
    return meta
Beispiel #3
0
    def get_ctu_flows(self, in_dir='../Datatsets', direction='src'):
        """
		https://www.stratosphereips.org/datasets-iot
		Malware on IoT Dataset
		"""
        self.normal_pcap = os.path.join(self.out_dir, f'pc_192.168.1.196.pcap')
        check_path(self.normal_pcap)
        # filter pcap
        # file_name = '2019-01-09-22-46-52-src_192.168.1.196_CTU_IoT_CoinMiner_anomaly.pcap'
        file_name = 'CTU-IoT-Malware-Capture-41-1_2019-01-09-22-46-52-192.168.1.196.pcap'
        pcap_file = get_file_path(in_dir=in_dir,
                                  dataset_name='CTU/IOT_2017',
                                  file_name=file_name)
        filter_ip(pcap_file,
                  self.normal_pcap,
                  ips=['192.168.1.196'],
                  direction=direction)
        normal_flows = _pcap2flows(self.normal_pcap,
                                   verbose=10)  # normal  flows

        self.abnormal_pcap = os.path.join(self.out_dir,
                                          f'pc_192.168.1.195_abnormal.pcap')
        check_path(self.normal_pcap)
        # file_name = '2018-12-21-15-50-14-src_192.168.1.195-CTU_IoT_Mirai_normal.pcap'
        file_name = 'CTU-IoT-Malware-Capture-34-1_2018-12-21-15-50-14-192.168.1.195.pcap'
        pcap_file = get_file_path(ipt_dir=in_dir,
                                  dataset_name='CTU/IOT_2017',
                                  file_name=file_name)
        filter_ip(pcap_file,
                  self.abnormal_pcap,
                  ips=['192.168.1.195'],
                  direction=direction)
        abnormal_flows = _pcap2flows(self.abnormal_pcap,
                                     verbose=10)  # normal  flows
        meta = {
            'normal_flows': normal_flows,
            'abnormal_flows': abnormal_flows,
            'normal_pcap': self.normal_pcap,
            'abnormal_pcap': self.abnormal_pcap,
            'direction': direction,
            'in_dir': in_dir
        }
        return meta
Beispiel #4
0
def get_smtv2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019',
                       out_dir='',
                       dataset_name='smtv_10.42.0.1',
                       direction='src'):
    IP = '10.42.0.1'
    normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap')
    check_path(normal_pcap)
    file_name = 'pc_10.42.0.1_normal.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction)
    normal_flows = _pcap2flows(normal_pcap, verbose=10)  # normal  flows
    max_interval = np.quantile(
        [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
    normal_flows = augment_flows(normal_flows,
                                 step=10,
                                 max_interval=max_interval)

    abnormal_pcap = os.path.join(out_dir, f'pc_10.42.0.119_abnormal.pcap')
    check_path(normal_pcap)
    file_name = 'pc_10.42.0.119_anomaly.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file,
              abnormal_pcap,
              ips=['10.42.0.119'],
              direction=direction)
    abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10)  # normal  flows
    abnormal_flows = augment_flows(abnormal_flows,
                                   step=10,
                                   max_interval=max_interval)
    meta = {
        'normal_flows': normal_flows,
        'abnormal_flows': abnormal_flows,
        'normal_pcaps': [normal_pcap],
        'abnormal_pcaps': [abnormal_pcap],
        'direction': direction,
        'in_dir': in_dir
    }
    return meta
Beispiel #5
0
def get_bstch2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019',
                        out_dir='',
                        dataset_name='scam_192.168.143.48',
                        direction='src'):
    IP = '192.168.143.48'
    normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap')
    check_path(normal_pcap)
    # file_name = 'bose_soundtouch-2daysactiv-src_192.168.143.48-normal.pcap'
    file_name = 'fridge_cam_sound_ghome_2daysactiv-bstch_normal.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction)
    normal_flows = _pcap2flows(normal_pcap, verbose=10)  # normal  flows
    max_interval = np.quantile(
        [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
    normal_flows = augment_flows(normal_flows,
                                 step=10,
                                 max_interval=max_interval)

    abnormal_pcap = os.path.join(out_dir, f'pc_{IP}_abnormal.pcap')
    check_path(normal_pcap)
    # file_name = 'bose_soundtouch-2daysactiv-src_192.168.143.48-anomaly.pcap'
    file_name = 'fridge_cam_sound_ghome_2daysactiv-bstch_abnormal.pcap'
    pcap_file = get_file_path(ipt_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, abnormal_pcap, ips=[IP], direction=direction)
    abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10)  # abnormal  flows
    # abnormal_flows = augment_flows(abnormal_flows, starts=50, max_len=max_len)
    abnormal_flows = augment_flows(abnormal_flows,
                                   step=10,
                                   max_interval=max_interval)
    meta = {
        'normal_flows': normal_flows,
        'abnormal_flows': abnormal_flows,
        'normal_pcaps': [normal_pcap],
        'abnormal_pcaps': [abnormal_pcap],
        'direction': direction,
        'in_dir': in_dir
    }
    return meta
Beispiel #6
0
	def get_mawi_flows(self, in_dir='../Datatsets', direction='src'):

		self.normal_pcap = os.path.join(self.out_dir, f'pc_202.171.168.50.pcap')
		check_path(self.normal_pcap)
		file_name = 'samplepoint-F_201912071400-src_dst_202.171.168.50.pcap'
		pcap_file = get_file_path(in_dir=in_dir, dataset_name='MAWI/WIDE_2019',
		                          file_name=file_name)
		filter_ip(pcap_file, self.normal_pcap, ips=['202.171.168.50'], direction=direction)
		normal_flows = _pcap2flows(self.normal_pcap, verbose=10)  # normal  flows

		self.abnormal_pcap = os.path.join(self.out_dir, f'pc_203.113.113.16_abnormal.pcap')
		check_path(self.normal_pcap)
		# file_name = 'samplepoint-F_201912071400-src_dst_202.4.27.109.pcap'    # ~5000
		file_name = 'samplepoint-F_201912071400-src_203.113.113.16.pcap'  # ~1500
		pcap_file = get_file_path(ipt_dir=in_dir, dataset_name='MAWI/WIDE_2019',
		                          file_name=file_name)
		filter_ip(pcap_file, self.abnormal_pcap, ips=['203.113.113.16'], direction=direction)
		abnormal_flows = _pcap2flows(self.abnormal_pcap, verbose=10)  # normal  flows
		meta = {'normal_flows': normal_flows, 'abnormal_flows': abnormal_flows,
		        'normal_pcap': self.normal_pcap, 'abnormal_pcap': self.abnormal_pcap,
		        'direction': direction, 'in_dir': in_dir}
		return meta
Beispiel #7
0
    def get_unb_flows(self, in_dir='../Datatsets', direction='src'):

        # preprocessed the pcap and label on original pcap and label
        self.pcap_file = os.path.join(self.out_dir, f'pc_{self.IP}_AGMT.pcap')
        self.label_file = os.path.join(self.out_dir, f'pc_{self.IP}_AGMT.csv')
        remove_file(self.pcap_file, self.overwrite)
        remove_file(self.label_file, self.overwrite)
        check_path(self.pcap_file)
        check_path(self.label_file)

        if not os.path.exists(self.pcap_file) or not os.path.exists(
                self.label_file):
            # 1. original pcap
            friday_pacp_orig = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='pcaps/Friday',
                file_name='Friday-WorkingHours.pcap')
            # filter pcap
            filter_ip(friday_pacp_orig,
                      out_file=self.pcap_file,
                      ips=[self.IP],
                      direction=self.direction,
                      keep_original=True)

            # 2. merge original labels
            friday_label = get_file_path(
                ipt_dir=self.out_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Morning.pcap_ISCX.csv')
            friday_label_orig1 = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Morning.pcap_ISCX.csv')
            friday_label_orig2 = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv')
            friday_label_orig3 = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv'
            )
            friday_label_tmp = friday_label + '-all.csv'
            check_path(friday_label_tmp)
            merge_labels(
                [friday_label_orig1, friday_label_orig2, friday_label_orig3],
                mrg_label_path=friday_label_tmp)
            filter_csv_ip(friday_label_tmp,
                          out_file=self.label_file,
                          ips=[self.IP],
                          direction=self.direction)

        ##############################################################################################
        # step 2.1 extract flows
        flows = _pcap2flows(self.pcap_file,
                            verbose=10)  # normal and abnormal flows
        # step 2.2 split normal flow and abnormal flow
        labels = pd.read_csv(self.label_file).values  #
        normal_flows, abnormal_flows = split_normal_abnormal(flows, labels)
        # augment abnormal flows
        max_interval = np.quantile(
            [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
        abnormal_flows = augment_flows(abnormal_flows,
                                       step=1,
                                       max_interval=max_interval)
        meta = {
            'normal_flows': normal_flows,
            'abnormal_flows': abnormal_flows,
            'normal_pcap': self.pcap_file,
            'abnormal_pcap': self.label_file,
            'direction': direction,
            'in_dir': in_dir
        }

        return meta