예제 #1
0
import csv
from virus_total_apis import PublicApi as VT
from bat import log_to_dataframe, dataframe_to_matrix
import pandas as pd
import numpy as np
import sklearn
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
from sklearn.svm import OneClassSVM

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import tkinter
from geoip import geolite2

clean_df = log_to_dataframe.LogToDataFrame('bro/clean_traffic/http.log')
mixed_df = log_to_dataframe.LogToDataFrame('bro/mixed_traffic/http.log')

#print(clean_df.head())
#print(mixed_df.head())
#trans_depth
features = [
    'ts', 'day', 'id.resp_h', 'id.resp_p', 'method', 'host', 'user_agent',
    'request_body_len', 'response_body_len', 'status_code', 'info_code'
]

clean_df = clean_df.reset_index()
mixed_df = mixed_df.reset_index()


def convert(ip):
예제 #2
0
    # If no args just call help
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    # Sanity check that this is a dns log
    if not args.bro_log.endswith('dns.log'):
        print('This example only works with Bro dns.log files..')
        sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Create a Pandas dataframe from the Bro log
        bro_df = log_to_dataframe.LogToDataFrame(args.bro_log)

        # Add query length
        bro_df['query_length'] = bro_df['query'].str.len()

        # Normalize this field
        ql = bro_df['query_length']
        bro_df['query_length_norm'] = (ql - ql.min()) / (ql.max()-ql.min())

        # These are the features we want (note some of these are categorical!)
        features = ['AA', 'RA', 'RD', 'TC', 'Z', 'rejected', 'proto', 'query',
                    'qclass_name', 'qtype_name', 'rcode_name', 'query_length_norm']
        feature_df = bro_df[features]

        # Use the super awesome DataframeToMatrix class (handles categorical data!)
        to_matrix = dataframe_to_matrix.DataFrameToMatrix()
예제 #3
0
#!/usr/bin/python3

import bat
from bat import log_to_dataframe, dataframe_to_matrix
import pandas as pd
import numpy as np
import sklearn
from sklearn.ensemble import IsolationForest
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import tkinter
from geoip import geolite2

clean_df = log_to_dataframe.LogToDataFrame('bro/clean_traffic/conn.log')
mixed_df = log_to_dataframe.LogToDataFrame('bro/malicious_traffic/conn.log')
print(clean_df.head())
print(mixed_df.head())
features = [
    'ts', 'day', 'id.resp_h', 'id.resp_p', 'proto', 'service', 'duration',
    'orig_bytes', 'resp_bytes', 'local_orig', 'local_resp', 'orig_pkts',
    'resp_pkts'
]
#features = ['id.orig_h', 'id.resp_h']

clean_df = clean_df.reset_index()
mixed_df = mixed_df.reset_index()

clean_df = clean_df[clean_df.service != 'dns']
mixed_df = mixed_df[mixed_df.service != 'dns']