Beispiel #1
0
def get_scam2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019',
                       out_dir='',
                       dataset_name='scam_192.168.143.42',
                       direction='src'):
    IP = '192.168.143.42'
    normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap')
    check_path(normal_pcap)
    file_name = 'fridge_cam_sound_ghome_2daysactiv-scam_normal.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction)
    normal_flows = _pcap2flows(
        normal_pcap,
        verbose=10)  # ~1000 normal flows, it will generate > 1000 subflows
    max_interval = np.quantile(
        [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
    normal_flows = augment_flows(normal_flows,
                                 step=10,
                                 max_interval=max_interval)
    lg.debug(f'normal_flows: {len(normal_flows)}')

    abnormal_pcap = os.path.join(out_dir, f'pc_{IP}_abnormal.pcap')
    check_path(normal_pcap)
    # file_name = 'samsung_camera-2daysactiv-src_192.168.143.42-anomaly.pca'
    file_name = 'fridge_cam_sound_ghome_2daysactiv-scam_abnormal.pcap'
    pcap_file = get_file_path(ipt_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, abnormal_pcap, ips=[IP], direction=direction)
    abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10)
    abnormal_flows = augment_flows(abnormal_flows,
                                   step=1,
                                   max_interval=max_interval)
    lg.debug(f'after augmenting abnormal_flows: {len(abnormal_flows)}')
    meta = {
        'normal_flows': normal_flows,
        'abnormal_flows': abnormal_flows,
        'normal_pcaps': [normal_pcap],
        'abnormal_pcaps': [abnormal_pcap],
        'direction': direction,
        'in_dir': in_dir
    }
    return meta
Beispiel #2
0
def get_smtv2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019',
                       out_dir='',
                       dataset_name='smtv_10.42.0.1',
                       direction='src'):
    IP = '10.42.0.1'
    normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap')
    check_path(normal_pcap)
    file_name = 'pc_10.42.0.1_normal.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction)
    normal_flows = _pcap2flows(normal_pcap, verbose=10)  # normal  flows
    max_interval = np.quantile(
        [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
    normal_flows = augment_flows(normal_flows,
                                 step=10,
                                 max_interval=max_interval)

    abnormal_pcap = os.path.join(out_dir, f'pc_10.42.0.119_abnormal.pcap')
    check_path(normal_pcap)
    file_name = 'pc_10.42.0.119_anomaly.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file,
              abnormal_pcap,
              ips=['10.42.0.119'],
              direction=direction)
    abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10)  # normal  flows
    abnormal_flows = augment_flows(abnormal_flows,
                                   step=10,
                                   max_interval=max_interval)
    meta = {
        'normal_flows': normal_flows,
        'abnormal_flows': abnormal_flows,
        'normal_pcaps': [normal_pcap],
        'abnormal_pcaps': [abnormal_pcap],
        'direction': direction,
        'in_dir': in_dir
    }
    return meta
Beispiel #3
0
def get_bstch2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019',
                        out_dir='',
                        dataset_name='scam_192.168.143.48',
                        direction='src'):
    IP = '192.168.143.48'
    normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap')
    check_path(normal_pcap)
    # file_name = 'bose_soundtouch-2daysactiv-src_192.168.143.48-normal.pcap'
    file_name = 'fridge_cam_sound_ghome_2daysactiv-bstch_normal.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction)
    normal_flows = _pcap2flows(normal_pcap, verbose=10)  # normal  flows
    max_interval = np.quantile(
        [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
    normal_flows = augment_flows(normal_flows,
                                 step=10,
                                 max_interval=max_interval)

    abnormal_pcap = os.path.join(out_dir, f'pc_{IP}_abnormal.pcap')
    check_path(normal_pcap)
    # file_name = 'bose_soundtouch-2daysactiv-src_192.168.143.48-anomaly.pcap'
    file_name = 'fridge_cam_sound_ghome_2daysactiv-bstch_abnormal.pcap'
    pcap_file = get_file_path(ipt_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, abnormal_pcap, ips=[IP], direction=direction)
    abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10)  # abnormal  flows
    # abnormal_flows = augment_flows(abnormal_flows, starts=50, max_len=max_len)
    abnormal_flows = augment_flows(abnormal_flows,
                                   step=10,
                                   max_interval=max_interval)
    meta = {
        'normal_flows': normal_flows,
        'abnormal_flows': abnormal_flows,
        'normal_pcaps': [normal_pcap],
        'abnormal_pcaps': [abnormal_pcap],
        'direction': direction,
        'in_dir': in_dir
    }
    return meta
Beispiel #4
0
    def get_unb_flows(self, in_dir='../Datatsets', direction='src'):

        # preprocessed the pcap and label on original pcap and label
        self.pcap_file = os.path.join(self.out_dir, f'pc_{self.IP}_AGMT.pcap')
        self.label_file = os.path.join(self.out_dir, f'pc_{self.IP}_AGMT.csv')
        remove_file(self.pcap_file, self.overwrite)
        remove_file(self.label_file, self.overwrite)
        check_path(self.pcap_file)
        check_path(self.label_file)

        if not os.path.exists(self.pcap_file) or not os.path.exists(
                self.label_file):
            # 1. original pcap
            friday_pacp_orig = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='pcaps/Friday',
                file_name='Friday-WorkingHours.pcap')
            # filter pcap
            filter_ip(friday_pacp_orig,
                      out_file=self.pcap_file,
                      ips=[self.IP],
                      direction=self.direction,
                      keep_original=True)

            # 2. merge original labels
            friday_label = get_file_path(
                ipt_dir=self.out_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Morning.pcap_ISCX.csv')
            friday_label_orig1 = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Morning.pcap_ISCX.csv')
            friday_label_orig2 = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv')
            friday_label_orig3 = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv'
            )
            friday_label_tmp = friday_label + '-all.csv'
            check_path(friday_label_tmp)
            merge_labels(
                [friday_label_orig1, friday_label_orig2, friday_label_orig3],
                mrg_label_path=friday_label_tmp)
            filter_csv_ip(friday_label_tmp,
                          out_file=self.label_file,
                          ips=[self.IP],
                          direction=self.direction)

        ##############################################################################################
        # step 2.1 extract flows
        flows = _pcap2flows(self.pcap_file,
                            verbose=10)  # normal and abnormal flows
        # step 2.2 split normal flow and abnormal flow
        labels = pd.read_csv(self.label_file).values  #
        normal_flows, abnormal_flows = split_normal_abnormal(flows, labels)
        # augment abnormal flows
        max_interval = np.quantile(
            [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
        abnormal_flows = augment_flows(abnormal_flows,
                                       step=1,
                                       max_interval=max_interval)
        meta = {
            'normal_flows': normal_flows,
            'abnormal_flows': abnormal_flows,
            'normal_pcap': self.pcap_file,
            'abnormal_pcap': self.label_file,
            'direction': direction,
            'in_dir': in_dir
        }

        return meta