Ejemplo n.º 1
0
    def test_sum(self):
        """最后返回的列表"""

        tran_date = '20191127'
        dr_cr_flag = '2'
        x = utils.get_time_range_dtl(tran_date, dr_cr_flag)
        y = utils.get_sum(x)
        for k in y:
            print(k)
Ejemplo n.º 2
0
# -*- coding: utf-8 -*-
# @Time    : 2020/4/24 19:02
# @Author  : Mqz
# @FileName: main.py

# main.py

from utils import get_sum
from class_utils import *

print(get_sum(1, 2))

encoder = Encoder()
decoder = Decoder()

print(encoder.encode('abcde'))
print(decoder.decode('edcba'))

# ########## 输出 ##########
#
# 3
# edcba
# abcde
Ejemplo n.º 3
0
#! /usr/bin/python3
# coding=utf-8

from utils import get_sum
print('get_sum: ', get_sum(1, 2))
Ejemplo n.º 4
0
    def process(self):
        # fill na
        for column in self.nanum_columns:
            print("Fill NA {}".format(column))
            self.df_all[column].fillna(-1, inplace=True)

        for column in self.nastr_columns:
            print("Fill NA {}".format(column))
            self.df_all[column].fillna("", inplace=True)

        # new features
        self.df_all["dstipscope_dominate"] = self.df_all.apply(
            lambda row: utils.get_ip_scope(row["dstipcategory_dominate"]),
            axis=1)
        self.df_all["srcipscope_dominate"] = self.df_all.apply(
            lambda row: utils.get_ip_scope(row["srcipcategory_dominate"]),
            axis=1)

        # ip zone features
        self.df_all["ip_zone_1"] = self.df_all.apply(
            lambda row: utils.get_ip_zone(row["ip"], 1), axis=1)
        self.df_all["ip_zone_2"] = self.df_all.apply(
            lambda row: utils.get_ip_zone(row["ip"], 2), axis=1)
        self.df_all["ip_zone_3"] = self.df_all.apply(
            lambda row: utils.get_ip_zone(row["ip"], 3), axis=1)
        self.df_all["ip_zone_4"] = self.df_all.apply(
            lambda row: utils.get_ip_zone(row["ip"], 4), axis=1)

        # concatenation features
        self.df_all["ip_zone_12"] = self.df_all.apply(
            lambda row: utils.concatenate_values(
                [row["ip_zone_1"], row["ip_zone_2"]]),
            axis=1)
        self.df_all["ip_zone_123"] = self.df_all.apply(
            lambda row: utils.concatenate_values(
                [row["ip_zone_1"], row["ip_zone_2"], row["ip_zone_3"]]),
            axis=1)
        self.df_all["ip_zone_34"] = self.df_all.apply(
            lambda row: utils.concatenate_values(
                [row["ip_zone_3"], row["ip_zone_4"]]),
            axis=1)
        self.df_all["ip_zone_234"] = self.df_all.apply(
            lambda row: utils.concatenate_values(
                [row["ip_zone_2"], row["ip_zone_3"], row["ip_zone_4"]]),
            axis=1)
        self.le_columns.append("ip_zone_12")
        self.le_columns.append("ip_zone_123")
        self.le_columns.append("ip_zone_34")
        self.le_columns.append("ip_zone_234")

        feature_pairs = [("categoryname", "ipcategory_scope"), \
                         ("categoryname", "overallseverity"), \
                         ("srcipscope_dominate", "dstipscope_dominate")]

        for item in feature_pairs:
            f1 = item[0]
            f2 = item[1]
            fn = f1 + "_" + f2
            self.df_all[fn] = self.df_all.apply(
                lambda row: utils.concatenate_values([row[f1], row[f2]]),
                axis=1)
            self.le_columns.append(fn)

        # timestamp_dist in hour and minute
        self.df_all["timestamp_hour"] = self.df_all.apply(
            lambda row: utils.get_duration(row["timestamp_dist"]), axis=1)

        # ending time features
        self.df_all["end_hour"] = self.df_all.apply(
            lambda row: utils.get_end_time(row["start_hour"], row[
                "start_minute"], row["start_second"], row["timestamp_dist"],
                                           "hour"),
            axis=1)
        self.df_all["end_minute"] = self.df_all.apply(
            lambda row: utils.get_end_time(row["start_hour"], row[
                "start_minute"], row["start_second"], row["timestamp_dist"],
                                           "minute"),
            axis=1)
        self.df_all["end_second"] = self.df_all.apply(
            lambda row: utils.get_end_time(row["start_hour"], row[
                "start_minute"], row["start_second"], row["timestamp_dist"],
                                           "second"),
            axis=1)

        # sum score features
        self.df_all["sum_score"] = self.df_all.apply(
            lambda row: utils.get_sum([
                row["{}score".format(score)]
                for score in ["untrust", "flow", "trust", "enforcement"]
            ]),
            axis=1)
        self.df_all["sum_n"] = self.df_all.apply(lambda row: utils.get_sum(
            [row["n{}".format(i)] for i in range(1, 11)]),
                                                 axis=1)
        self.df_all["sum_p5"] = self.df_all.apply(lambda row: utils.get_sum(
            [row["p5{}".format(p5)] for p5 in ["m", "w", "d"]]),
                                                  axis=1)
        self.df_all["sum_p8"] = self.df_all.apply(lambda row: utils.get_sum(
            [row["p8{}".format(p8)] for p8 in ["m", "w", "d"]]),
                                                  axis=1)
        #self.df_all["sum_p58"] = self.df_all.apply(lambda row: utils.get_sum([row["sum_p5"], row["sum_p8"]]), axis = 1)

        # get ratio features
        # self.df_all["thrcnt_month_week"] = self.df_all.apply(lambda row: utils.get_ratio(row["thrcnt_month"], row["thrcnt_week"]), axis = 1)
        self.df_all["thrcnt_month_day"] = self.df_all.apply(
            lambda row: utils.get_ratio(row["thrcnt_month"], row["thrcnt_day"]
                                        ),
            axis=1)
        self.df_all["thrcnt_week_day"] = self.df_all.apply(
            lambda row: utils.get_ratio(row["thrcnt_week"], row["thrcnt_day"]),
            axis=1)

        # encode features with label encoder
        label_encoder = LabelEncoder()
        for column in self.le_columns:
            print("Label encoding {}".format(column))
            label_encoder.fit(self.df_all[column])
            self.df_all[column] = label_encoder.transform(self.df_all[column])

        # encode features with one-hot encoder
        for column in self.oe_columns:
            print("One-hot encoding {}".format(column))
            pd_encoded = pd.get_dummies(self.df_all[column])
            pd_encoded.columns = [
                "{}_{}".format(column, "_".join(str(col).lower().split()))
                for col in pd_encoded.columns
            ]
            self.df_all.drop(column, axis=1, inplace=True)
            self.df_all = pd.concat([self.df_all, pd_encoded], axis=1)