Beispiel #1
0
def get_scam2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019',
                       out_dir='',
                       dataset_name='scam_192.168.143.42',
                       direction='src'):
    IP = '192.168.143.42'
    normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap')
    check_path(normal_pcap)
    file_name = 'fridge_cam_sound_ghome_2daysactiv-scam_normal.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction)
    normal_flows = _pcap2flows(
        normal_pcap,
        verbose=10)  # ~1000 normal flows, it will generate > 1000 subflows
    max_interval = np.quantile(
        [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
    normal_flows = augment_flows(normal_flows,
                                 step=10,
                                 max_interval=max_interval)
    lg.debug(f'normal_flows: {len(normal_flows)}')

    abnormal_pcap = os.path.join(out_dir, f'pc_{IP}_abnormal.pcap')
    check_path(normal_pcap)
    # file_name = 'samsung_camera-2daysactiv-src_192.168.143.42-anomaly.pca'
    file_name = 'fridge_cam_sound_ghome_2daysactiv-scam_abnormal.pcap'
    pcap_file = get_file_path(ipt_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, abnormal_pcap, ips=[IP], direction=direction)
    abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10)
    abnormal_flows = augment_flows(abnormal_flows,
                                   step=1,
                                   max_interval=max_interval)
    lg.debug(f'after augmenting abnormal_flows: {len(abnormal_flows)}')
    meta = {
        'normal_flows': normal_flows,
        'abnormal_flows': abnormal_flows,
        'normal_pcaps': [normal_pcap],
        'abnormal_pcaps': [abnormal_pcap],
        'direction': direction,
        'in_dir': in_dir
    }
    return meta
Beispiel #2
0
def get_smtv2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019',
                       out_dir='',
                       dataset_name='smtv_10.42.0.1',
                       direction='src'):
    IP = '10.42.0.1'
    normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap')
    check_path(normal_pcap)
    file_name = 'pc_10.42.0.1_normal.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction)
    normal_flows = _pcap2flows(normal_pcap, verbose=10)  # normal  flows
    max_interval = np.quantile(
        [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
    normal_flows = augment_flows(normal_flows,
                                 step=10,
                                 max_interval=max_interval)

    abnormal_pcap = os.path.join(out_dir, f'pc_10.42.0.119_abnormal.pcap')
    check_path(normal_pcap)
    file_name = 'pc_10.42.0.119_anomaly.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file,
              abnormal_pcap,
              ips=['10.42.0.119'],
              direction=direction)
    abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10)  # normal  flows
    abnormal_flows = augment_flows(abnormal_flows,
                                   step=10,
                                   max_interval=max_interval)
    meta = {
        'normal_flows': normal_flows,
        'abnormal_flows': abnormal_flows,
        'normal_pcaps': [normal_pcap],
        'abnormal_pcaps': [abnormal_pcap],
        'direction': direction,
        'in_dir': in_dir
    }
    return meta
Beispiel #3
0
def get_bstch2019_flows(in_dir=f'../Datasets/UCHI/IOT_2019',
                        out_dir='',
                        dataset_name='scam_192.168.143.48',
                        direction='src'):
    IP = '192.168.143.48'
    normal_pcap = os.path.join(out_dir, f'pc_{IP}.pcap')
    check_path(normal_pcap)
    # file_name = 'bose_soundtouch-2daysactiv-src_192.168.143.48-normal.pcap'
    file_name = 'fridge_cam_sound_ghome_2daysactiv-bstch_normal.pcap'
    pcap_file = get_file_path(in_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, normal_pcap, ips=[IP], direction=direction)
    normal_flows = _pcap2flows(normal_pcap, verbose=10)  # normal  flows
    max_interval = np.quantile(
        [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
    normal_flows = augment_flows(normal_flows,
                                 step=10,
                                 max_interval=max_interval)

    abnormal_pcap = os.path.join(out_dir, f'pc_{IP}_abnormal.pcap')
    check_path(normal_pcap)
    # file_name = 'bose_soundtouch-2daysactiv-src_192.168.143.48-anomaly.pcap'
    file_name = 'fridge_cam_sound_ghome_2daysactiv-bstch_abnormal.pcap'
    pcap_file = get_file_path(ipt_dir=in_dir,
                              dataset_name=dataset_name,
                              file_name=file_name)
    filter_ip(pcap_file, abnormal_pcap, ips=[IP], direction=direction)
    abnormal_flows = _pcap2flows(abnormal_pcap, verbose=10)  # abnormal  flows
    # abnormal_flows = augment_flows(abnormal_flows, starts=50, max_len=max_len)
    abnormal_flows = augment_flows(abnormal_flows,
                                   step=10,
                                   max_interval=max_interval)
    meta = {
        'normal_flows': normal_flows,
        'abnormal_flows': abnormal_flows,
        'normal_pcaps': [normal_pcap],
        'abnormal_pcaps': [abnormal_pcap],
        'direction': direction,
        'in_dir': in_dir
    }
    return meta
Beispiel #4
0
    def generate(self):
        if os.path.exists(self.Xy_file):
            self.X, self.y = load(self.Xy_file)
        else:
            q_interval = 0.9
            # pcap to flows
            flows = self.pcap2flows(self.pcap_file)

            # flows to subflow
            labels = [1] * len(flows)
            durations = [_get_flow_duration(pkts) for fid, pkts in flows]
            interval = _get_split_interval(durations, q_interval=q_interval)
            subflows, labels = self.flow2subflows(flows,
                                                  interval=interval,
                                                  labels=labels)

            # get dimension
            normal_flows = subflows
            num_pkts = [len(pkts)
                        for fid, pkts in normal_flows]  # only on normal flows
            dim = int(np.floor(np.quantile(
                num_pkts,
                q_interval)))  # use the same q_interval to get the dimension
            lg.info(f'dim={dim}')

            # flows to features
            features, fids = self.flow2features(subflows,
                                                name=self.feature_name)

            # fixed the feature size
            features = self.fix_feature(features, dim=dim)

            self.X = features
            self.y = np.asarray([0] * len(features))

            # save data to disk
            check_path(os.path.dirname(self.Xy_file))
            dump((self.X, self.y), out_file=self.Xy_file)

        return self.X, self.y
Beispiel #5
0
def _get_SAMP(flows, name='SAMP_NUM', dim=None, header=False, header_dim=None):
    qs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]
    flow_durations = [_get_flow_duration(pkts) for fid, pkts in flows]
    features_mp = {}
    fids_mp = {}
    for q in qs:
        sampling_rate_ = np.quantile(flow_durations, q=q)
        if name in ['SAMP_NUM', 'FFT_SAMP_NUM']:
            features, fids = _get_SAMP_NUM(flows, sampling_rate_)
        elif name in ['SAMP_SIZE', 'FFT_SAMP_SIZE']:
            features, fids = _get_SAMP_SIZE(flows, sampling_rate_)

        new_dim = dim
        if 'FFT' in name:
            features = _get_FFT_data(features,
                                     fft_bin=new_dim,
                                     fft_part='real')
            if header:
                header_features, header_fids = _get_header(flows)
                header_features = _get_FFT_data(
                    header_features, fft_bin=header_dim,
                    fft_part='real')  # 8 is the number of tcp_flg
                features = np.concatenate(
                    [features, header_features],
                    axis=1)  # concatanate feature and header
        else:
            features = _fix_data(features, new_dim)
            if header:
                header_features, header_fids = _get_header(flows)
                header_features = _fix_data(header_features, header_dim)
                features = np.concatenate(
                    [features, header_features],
                    axis=1)  # concatanate feature and header

        features_mp[q] = (features, fids, sampling_rate_)
        fids_mp[q] = (fids)
    return features_mp, fids_mp
Beispiel #6
0
    def _generate_flows(self):
        self.subflows_file = os.path.join(self.out_dir,
                                          'normal_abnormal_subflows.dat')
        remove_file(self.subflows_file, self.overwrite)
        if os.path.exists(self.subflows_file):
            return load(self.subflows_file)

        # step 2: extract flows from pcap
        ##############################################################################################
        meta = load(self.orig_flows)
        normal_flows, abnormal_flows = meta['normal_flows'], meta[
            'abnormal_flows']
        lg.debug(
            f'original normal flows: {len(normal_flows)} and abnormal flows: {len(abnormal_flows)}'
        )
        qs = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 1]
        len_stat = np.quantile([len(pkts) for f, pkts in normal_flows], q=qs)
        lg.debug(
            f'flows: {len(normal_flows)}, length statistic: {len_stat}, when q = {qs}'
        )
        meta = {
            'flows': normal_flows,
            'len_stat': (len_stat, qs),
            'normal_flows': normal_flows,
            'abnormal_flows': abnormal_flows
        }
        dump(meta,
             out_file=os.path.join(self.out_dir, 'normal_abnormal_flows.dat'))

        # step 2.2. only get normal flows durations
        self.flows_durations = [
            _get_flow_duration(pkts) for (fids, pkts) in normal_flows
        ]
        normal_durations_stat = np.quantile(self.flows_durations, q=qs)
        lg.debug(f'normal_durations_stat: {normal_durations_stat}')
        self.subflow_interval = np.quantile(
            self.flows_durations,
            q=self.q_flow_dur)  # median  of flow_durations
        lg.debug(
            f'---subflow_interval: {self.subflow_interval}, q_flow_dur: {self.q_flow_dur}'
        )
        # step 2.3 get subflows
        normal_flows, _ = _flows2subflows(normal_flows,
                                          interval=self.subflow_interval,
                                          labels=['0'] * len(normal_flows))
        abnormal_flows, _ = _flows2subflows(abnormal_flows,
                                            interval=self.subflow_interval,
                                            labels=['1'] * len(abnormal_flows))
        lg.debug(
            f'normal_flows: {len(normal_flows)}, and abnormal_flows: {len(abnormal_flows)} '
            f'with interval: {self.subflow_interval} and q: {self.q_flow_dur}')
        meta = {
            'normal_flows_durations': self.flows_durations,
            'normal_durations_stat': (normal_durations_stat, qs),
            'subflow_interval': self.subflow_interval,
            'q_flow_dur': self.q_flow_dur,
            'normal_flows': normal_flows,
            'abnormal_flows': abnormal_flows
        }
        dump(meta, out_file=self.subflows_file)

        # only return subflows
        return meta
Beispiel #7
0
    def get_unb_flows(self, in_dir='../Datatsets', direction='src'):

        # preprocessed the pcap and label on original pcap and label
        self.pcap_file = os.path.join(self.out_dir, f'pc_{self.IP}_AGMT.pcap')
        self.label_file = os.path.join(self.out_dir, f'pc_{self.IP}_AGMT.csv')
        remove_file(self.pcap_file, self.overwrite)
        remove_file(self.label_file, self.overwrite)
        check_path(self.pcap_file)
        check_path(self.label_file)

        if not os.path.exists(self.pcap_file) or not os.path.exists(
                self.label_file):
            # 1. original pcap
            friday_pacp_orig = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='pcaps/Friday',
                file_name='Friday-WorkingHours.pcap')
            # filter pcap
            filter_ip(friday_pacp_orig,
                      out_file=self.pcap_file,
                      ips=[self.IP],
                      direction=self.direction,
                      keep_original=True)

            # 2. merge original labels
            friday_label = get_file_path(
                ipt_dir=self.out_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Morning.pcap_ISCX.csv')
            friday_label_orig1 = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Morning.pcap_ISCX.csv')
            friday_label_orig2 = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv')
            friday_label_orig3 = get_file_path(
                ipt_dir=in_dir,
                dataset_name='UNB/CICIDS_2017/',
                data_cat='labels/Friday',
                file_name='Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv'
            )
            friday_label_tmp = friday_label + '-all.csv'
            check_path(friday_label_tmp)
            merge_labels(
                [friday_label_orig1, friday_label_orig2, friday_label_orig3],
                mrg_label_path=friday_label_tmp)
            filter_csv_ip(friday_label_tmp,
                          out_file=self.label_file,
                          ips=[self.IP],
                          direction=self.direction)

        ##############################################################################################
        # step 2.1 extract flows
        flows = _pcap2flows(self.pcap_file,
                            verbose=10)  # normal and abnormal flows
        # step 2.2 split normal flow and abnormal flow
        labels = pd.read_csv(self.label_file).values  #
        normal_flows, abnormal_flows = split_normal_abnormal(flows, labels)
        # augment abnormal flows
        max_interval = np.quantile(
            [_get_flow_duration(pkts) for f, pkts in normal_flows], q=0.9)
        abnormal_flows = augment_flows(abnormal_flows,
                                       step=1,
                                       max_interval=max_interval)
        meta = {
            'normal_flows': normal_flows,
            'abnormal_flows': abnormal_flows,
            'normal_pcap': self.pcap_file,
            'abnormal_pcap': self.label_file,
            'direction': direction,
            'in_dir': in_dir
        }

        return meta