예제 #1
0
 def make_frequency_dict(self, text):
     # frequency = {}
     # for character in text:
     #     if not character in frequency:
     #         frequency[character] = 0
     #     frequency[character] += 1
     ent = Entropy(self.path)
     self.symbols_count = ent.symbols_count
     return ent.freq
예제 #2
0
def GetEntropy(occlusions):
    I = np.zeros((XSize, YSize))
    for coord in occlusions:
        I[coord.X, coord.Y] = 1
    entropy = Entropy(I)
    outputMatrix = entropy.MovingWindowFilter(entropy.MovingAverage, 1)
    filteredMatrices = [outputMatrix]
    profile = entropy.Profile(filteredMatrices)

    return profile
    def entropy_for_feature(self, feature_number):
        unique_feature_values = [0, 1]

        entropy = 0.0

        for value in unique_feature_values:
            sub_features_list, sub_labels_list = \
                DataSetSplitter(self.features_list, self.labels_list, feature_number, value).new_data_set()

            probability = sub_features_list.shape[0] / float(
                self.data_set_entries_count)
            entropy += probability * Entropy(sub_features_list,
                                             sub_labels_list).value()

        return entropy
    def __entropy_for_feature(self, feature_number):
        feature_list = [example[feature_number] for example in self.data_set]
        unique_feature_values = set(feature_list)

        entropy = 0.0

        for value in unique_feature_values:
            sub_data_set = DataSetSplitter(self.data_set, feature_number,
                                           value).new_data_set()

            probability = len(sub_data_set) / float(
                self.data_set_entries_count)
            entropy += probability * Entropy(sub_data_set).value()

        return entropy
예제 #5
0
from pox.lib.packet.ipv4 import ipv4
from pox.lib.packet.arp import arp
from pox.lib.addresses import IPAddr, EthAddr
from pox.lib.util import str_to_bool, dpid_to_str
from pox.lib.recoco import Timer

import pox.openflow.libopenflow_01 as of

from pox.lib.revent import *
import itertools
import time

from entropy import Entropy

my_dictionary = {}
my_entropy = Entropy()
set_Timer = False
defendDDOS = False

log = core.getLogger()
FLOW_IDLE_TIMEOUT = 10
ARP_TIMEOUT = 60 * 2
MAX_BUFFERED_PER_IP = 5
MAX_BUFFER_TIME = 5


class Entry(object):
    def __init__(self, port, mac):
        self.timeout = time.time() + ARP_TIMEOUT
        self.port = port
        self.mac = mac
예제 #6
0
 def make_frequency_dict(self, text):
     ent = Entropy(self.path)
     ent.HBA()
     return ent.pairs
예제 #7
0
def Construct_Vector(mystr, conn):
    vec = []

    removed_protocol = re.sub(
        r'^http(s*)://', '',
        mystr)  # Removed Protocol in a given URL using Python Regex

    vec.append(len(removed_protocol))  # append length of URL to the Vector
    vec.append(Total_Dots(
        removed_protocol))  # append Number of Dots in URL to the Vector

    # Checking for Presence of Suspicious Words in URL
    for i in Suspicious_Words:
        if re.search(i, removed_protocol, re.IGNORECASE):
            vec.append(1)  # security sensitive word present so append 1
            break
    else:
        vec.append(0)  # security sensitive word not present so append 0

    patt = r'^[^/]*'  # pattern to extract domain from the URL
    patt_path = r'/[^/]*'  # pattern to extract path of URL
    dom = re.match(patt, removed_protocol).group(0)
    info = re.findall(patt_path, removed_protocol)
    # print('Domain Name: ',dom)
    dom_hyph_count = no_of_hyphens_in_domain(dom)
    vec.append(int(dom_hyph_count)
               )  # Appending Number of hyphens in Domain of URL to the Vector

    domain_tokens = dom.split('.')  # split the domain by the periods
    domain_tokens = [x for x in domain_tokens
                     if x != '']  # Removing Null Values (if Any)
    # print('Domain Length: ',len(dom))

    path_tokens = [re.sub('/', '', x) for x in info]
    if path_tokens != []:
        file_n_args = path_tokens[-1]
    else:
        file_n_args = ''
    path_tokens = path_tokens[:-1]
    info = [x for x in info if x != '']
    slashes = len(info)
    # print('Slashes:',slashes)
    dir_len = 0
    for i in path_tokens:
        dir_len += len(i)
    dir_len += slashes
    vec.append(
        int(dir_len))  # Appending Directory length to the URL to the Vector
    # print('Directory Length: ',dir_len)

    num_subdir = len(path_tokens)
    # print('Number of Subdirectories :',num_subdir)
    vec.append(
        num_subdir
    )  # Appending Number of Subdirectories	Present in the URL to the Vector
    # print('Path Tokens : ',path_tokens)

    TLD = domain_tokens[-1]
    # print('Top Level Domain :',TLD)
    vec.append(len(dom))  # Domain Length
    vec.append(len(domain_tokens))  # Domain Token Count
    vec.append(len(path_tokens))  # Path Token Count

    # does the url contain an IP address
    has_ip = ip_presence(removed_protocol)
    vec.append(has_ip)  # Presence of ip address Yes:1, No:0

    # get the alexa page rank
    has_alexa_rank = alexa_pagerank(dom, conn)
    vec.append(has_alexa_rank)

    # does page use ssl
    uses_https = check_https(mystr)
    vec.append(uses_https)

    # get country code and domain age calc
    country_code, dom_age_gt_1year = get_ip_info(dom)
    vec.append(country_code)

    # domain age gt 1 year
    vec.append(dom_age_gt_1year)

    # bag of words for word occurances
    word = bag_of_words(mystr)
    vec.append(word)

    # entropy of URL
    ent = Entropy(mystr)
    entropy = ent.H(mystr)
    vec.append(entropy)

    # count of special characters
    characters = special_chars(mystr)
    vec.append(characters)

    domain_tok_lengths = []
    for i in domain_tokens:
        domain_tok_lengths.append(len(i))
    largest_dom_token_len = max(domain_tok_lengths)
    vec.append(largest_dom_token_len)  # Largest Domain Token Length

    avg_dom_Tok_len = round(
        (float(sum(domain_tok_lengths)) / len(domain_tok_lengths)), 2)

    vec.append(avg_dom_Tok_len)  # Average Domain Token Length

    path_tok_lengths = []
    path_tok_dots = 0
    path_tok_delims = 0
    avg_path_Tok_len = 0
    largest_path_token_len = 0
    if len(path_tokens):
        for i in path_tokens:
            path_tok_lengths.append(len(i))
            path_tok_dots = Total_Dots(i)
            path_tok_delims = Total_Delims(i)
        avg_path_Tok_len = round(
            (float(sum(path_tok_lengths)) / len(path_tok_lengths)), 2)
        largest_path_token_len = max(path_tok_lengths)
        vec.append(largest_path_token_len)  # Largest Path Token Length
        vec.append(avg_path_Tok_len)  # Average Path Token Length
    else:
        vec.append(largest_path_token_len
                   )  # Largest Path Token Length :0 (No, Path Tokens)
        vec.append(
            avg_path_Tok_len)  # Average Path Token Length :0 (No, Path Tokens)
    # print('Largest Path Token Length:',largest_path_token_len)
    # print('Path Token Total Dots:',path_tok_dots)
    # print('Path Token Delims:',path_tok_delims)
    if has_ip:
        vec.append(0)  # Ip address present so no suspicious TLD
    else:
        for i in Suspicious_TLD:
            if re.search(i, TLD, re.IGNORECASE):
                vec.append(1)  # Suspicious TLD
                break
        else:
            vec.append(0)  # Non Suspicious TLD
    if file_n_args != '':

        # Define Condition whether file and arguments present in the URL
        # POST arguments are conditions passed after the ?
        # file (filenames) are items such as index.html
        tmp = file_n_args.split('?')
        file = tmp[0]
        if len(tmp) > 1:
            args = tmp[1]
        else:
            args = ''
        # print('File:',file)
        # print('Arguments:',args)
        if not file:
            vec.append(0)
        else:
            vec.append(1)
        vec.append(len(file))  # Length of file
        vec.append(Total_Dots(file))  # Total_Dots in file name
        vec.append(Total_Delims(file))  # Total_Delims in file name
        # print('Total dots in file: ',Total_Dots(file))
        # print('Total Delims in file: ',Total_Delims(file))

        if args == '':
            # Checking if any POST arguments present in the URL or not
            vec.append(0)  # no arguments present in url
            vec.append(0)  # Length of Argument Appended to the Vector
            vec.append(0)  # Number of Variables Appended to the Vector
            vec.append(
                0)  # Length of larges variable value Appended to the Vector
            vec.append(0)  # Maximum number of Delims Appended to the Vector
        # print('argument length:',0)
        # print('number of arguments:',0)
        # print('length of Largest variable value:',0)
        # print('Maximun no of delims:',0)

        else:
            # indicated Presence of POST arguments in the URL
            vec.append(1)  # arguments are present
            vec.append(len(args) +
                       1)  # Length of Argument Appended to the Vector
            # print('argument length:',len(args)+1)
            arb = args.split('&')
            vec.append(len(arb))  # Number of Arguments Appended to the Vector
            # print('Number of arguments',len(arb))
            len_var = []
            max_delim = []
            for i in arb:
                # Spliting POST Arguments around '=' sign
                tmp = i.split('=')
                if len(tmp) > 1:
                    len_var.append(len(tmp[1]))
                    max_delim.append(Total_Delims(tmp[0]))
                    max_delim.append(Total_Delims(tmp[1]))
                else:
                    len_var.append(0)
                    max_delim.append(0)
            vec.append(max(len_var))  # Length of Largest variable value
            # print('length of Largest variable value:',max(len_var))
            max_delim = max(max_delim)
            vec.append(max_delim)  # Maximum number of Delimeters

        # print('Maximum no of delims:',max_delim)

    else:

        # Defines condition to the corresponding if that File and Arguments are not Present in the URL so
        # Just Append 0 to the corresponding Parameter in the Vector
        vec.append(0)  # has file name in url
        vec.append(0)  # Length of file Appended to the Vector
        vec.append(0)  # Total_Dots in file name Appended to the Vector
        vec.append(0)  # Total_Delims in file name Appended to the Vector
        vec.append(0)  # has arguments appended to url
        vec.append(0)  # Length of Argument Appended to the Vector
        vec.append(0)  # Number of Variables Appended to the Vector
        vec.append(0)  # Length of larges variable value Appended to the Vector
        vec.append(0)  # Maximum number of Delims Appended to the Vector
    # print('argument length:',0)
    # print('number of arguments:',0)

    return vec
예제 #8
0
#!/usr/bin/env python3

import sys
from entropy import Entropy

if len(sys.argv) != 3:
    print("Correct usage: python3 run.py </path/to/file> <N>")
else:
    file_path = sys.argv[1]
    n = int(sys.argv[2])

    elements = []
    with open(file_path, 'r') as file:
        for line in file:
            elements.extend(list(line))

    entropy = Entropy(elements)
    # If memory equals N, we need to consider N + 1 elements at a time.
    print(f"Entropy with N = {n}: {entropy.entropy(n + 1)}")
예제 #9
0
def img_analysis(image_path: str):

    counter = Entropy()

    with open(image_path, 'rb+') as f:
        # read the header
        id_length = one_byte(f)
        colour_map_type = one_byte(f)
        image_type = one_byte(f)
        # colour map specification
        first_entry_index = byte_list(f, 2)
        colour_map_length = byte_list(f, 2)
        colour_map_entry_size = ord(f.read(1))
        # image specification
        x_origin = byte_list(f, 2)
        y_origin = byte_list(f, 2)
        image_width = int_from_bytes(byte_list(f, 2))
        image_height = int_from_bytes(byte_list(f, 2))
        pixel_depth = one_byte(f)
        image_descriptor = one_byte(f)

        # create a two-line pixel buffer
        # the pixel on the most left is always black
        # 0 -> top row
        # 1 -> current row
        buffer = [[(0, 0, 0) for _ in range(0, image_width + 1)]
                  for _ in [1, 2]]
        # load the first row of pixels
        for pixel in range(1, image_width + 1):
            # for every pixel load three bytes represeting BGR colours
            buffer[0][pixel] = byte_list(f, 3)

        for line in range(0, image_height):
            # take the top row as the current one
            # (use switching as a hackaround, because of pointers)
            buffer[1], buffer[0] = buffer[0], buffer[1]
            # and load another row on top
            if line != image_height - 1:
                for pixel in range(1, image_width + 1):
                    buffer[0][pixel] = byte_list(f, 3)
            else:
                # if this is the last row, the top row needs to be
                # a row of black pixels
                for pixel in range(1, image_width + 1):
                    buffer[0][pixel] = (0, 0, 0)

            # loop through the loaded pixels
            for i in range(1, image_width + 1):
                pixel = buffer[1][i]
                west = buffer[1][i - 1]
                north = buffer[0][i]
                northwest = buffer[0][i - 1]

                # do all the predictions

                # \hat{X} = (0,0,0)
                counter.register_char('normal', pixel)
                # \hat{X} = W
                hat_x = west
                counter.register_char('W', subtract_pixels(pixel, hat_x))
                # \hat{X} = N
                hat_x = north
                counter.register_char('N', subtract_pixels(pixel, hat_x))
                # \hat{X} = NW
                hat_x = northwest
                counter.register_char('NW', subtract_pixels(pixel, hat_x))
                # \hat{X} = N + W - NW
                hat_x = subtract_pixels(add_pixels(north, west), northwest)
                counter.register_char('N + W - NW',
                                      subtract_pixels(pixel, hat_x))
                # \hat{X} = N + (W - NW)/2
                hat_x = add_pixels(
                    north, scale_pixel(subtract_pixels(west, northwest), 0.5))
                counter.register_char('N + (W - NW)/2',
                                      subtract_pixels(pixel, hat_x))
                # \hat{X} = W + (N - NW)/2
                hat_x = add_pixels(
                    west, scale_pixel(subtract_pixels(north, northwest), 0.5))
                counter.register_char('W + (N - NW)/2',
                                      subtract_pixels(pixel, hat_x))
                # \hat{X} = (N + W)/2
                hat_x = scale_pixel(add_pixels(north, west), 0.5)
                counter.register_char('(N + W)/2',
                                      subtract_pixels(pixel, hat_x))
                # new standard
                hat_x = [0, 0, 0]
                # perform the algorithm for every colour discretely
                for i in range(0, 3):
                    if northwest[i] >= max(west[i], north[i]):
                        hat_x[i] = max(west[i], north[i])
                    elif northwest[i] <= min(west[i], north[i]):
                        hat_x[i] = min(west[i], north[i])
                    else:
                        hat_x[i] = west[i] + north[i] - northwest[i]
                hat_x = tuple(hat_x)
                counter.register_char('new standard',
                                      subtract_pixels(pixel, hat_x))

    return counter
 def __init__(self, features_list, labels_list):
     self.features_list = features_list
     self.labels_list = labels_list
     self.data_set_entries_count = features_list.shape[0]
     self.number_of_features = features_list.shape[1] - 1
     self.base_entropy = Entropy(features_list, labels_list).value()
 def __init__(self, data_set):
     self.data_set = data_set
     self.data_set_entries_count = len(data_set)
     self.number_of_features = len(data_set[0]) - 1
     self.base_entropy = Entropy(data_set).value()