def make_frequency_dict(self, text): # frequency = {} # for character in text: # if not character in frequency: # frequency[character] = 0 # frequency[character] += 1 ent = Entropy(self.path) self.symbols_count = ent.symbols_count return ent.freq
def GetEntropy(occlusions): I = np.zeros((XSize, YSize)) for coord in occlusions: I[coord.X, coord.Y] = 1 entropy = Entropy(I) outputMatrix = entropy.MovingWindowFilter(entropy.MovingAverage, 1) filteredMatrices = [outputMatrix] profile = entropy.Profile(filteredMatrices) return profile
def entropy_for_feature(self, feature_number): unique_feature_values = [0, 1] entropy = 0.0 for value in unique_feature_values: sub_features_list, sub_labels_list = \ DataSetSplitter(self.features_list, self.labels_list, feature_number, value).new_data_set() probability = sub_features_list.shape[0] / float( self.data_set_entries_count) entropy += probability * Entropy(sub_features_list, sub_labels_list).value() return entropy
def __entropy_for_feature(self, feature_number): feature_list = [example[feature_number] for example in self.data_set] unique_feature_values = set(feature_list) entropy = 0.0 for value in unique_feature_values: sub_data_set = DataSetSplitter(self.data_set, feature_number, value).new_data_set() probability = len(sub_data_set) / float( self.data_set_entries_count) entropy += probability * Entropy(sub_data_set).value() return entropy
from pox.lib.packet.ipv4 import ipv4 from pox.lib.packet.arp import arp from pox.lib.addresses import IPAddr, EthAddr from pox.lib.util import str_to_bool, dpid_to_str from pox.lib.recoco import Timer import pox.openflow.libopenflow_01 as of from pox.lib.revent import * import itertools import time from entropy import Entropy my_dictionary = {} my_entropy = Entropy() set_Timer = False defendDDOS = False log = core.getLogger() FLOW_IDLE_TIMEOUT = 10 ARP_TIMEOUT = 60 * 2 MAX_BUFFERED_PER_IP = 5 MAX_BUFFER_TIME = 5 class Entry(object): def __init__(self, port, mac): self.timeout = time.time() + ARP_TIMEOUT self.port = port self.mac = mac
def make_frequency_dict(self, text): ent = Entropy(self.path) ent.HBA() return ent.pairs
def Construct_Vector(mystr, conn): vec = [] removed_protocol = re.sub( r'^http(s*)://', '', mystr) # Removed Protocol in a given URL using Python Regex vec.append(len(removed_protocol)) # append length of URL to the Vector vec.append(Total_Dots( removed_protocol)) # append Number of Dots in URL to the Vector # Checking for Presence of Suspicious Words in URL for i in Suspicious_Words: if re.search(i, removed_protocol, re.IGNORECASE): vec.append(1) # security sensitive word present so append 1 break else: vec.append(0) # security sensitive word not present so append 0 patt = r'^[^/]*' # pattern to extract domain from the URL patt_path = r'/[^/]*' # pattern to extract path of URL dom = re.match(patt, removed_protocol).group(0) info = re.findall(patt_path, removed_protocol) # print('Domain Name: ',dom) dom_hyph_count = no_of_hyphens_in_domain(dom) vec.append(int(dom_hyph_count) ) # Appending Number of hyphens in Domain of URL to the Vector domain_tokens = dom.split('.') # split the domain by the periods domain_tokens = [x for x in domain_tokens if x != ''] # Removing Null Values (if Any) # print('Domain Length: ',len(dom)) path_tokens = [re.sub('/', '', x) for x in info] if path_tokens != []: file_n_args = path_tokens[-1] else: file_n_args = '' path_tokens = path_tokens[:-1] info = [x for x in info if x != ''] slashes = len(info) # print('Slashes:',slashes) dir_len = 0 for i in path_tokens: dir_len += len(i) dir_len += slashes vec.append( int(dir_len)) # Appending Directory length to the URL to the Vector # print('Directory Length: ',dir_len) num_subdir = len(path_tokens) # print('Number of Subdirectories :',num_subdir) vec.append( num_subdir ) # Appending Number of Subdirectories Present in the URL to the Vector # print('Path Tokens : ',path_tokens) TLD = domain_tokens[-1] # print('Top Level Domain :',TLD) vec.append(len(dom)) # Domain Length vec.append(len(domain_tokens)) # Domain Token Count vec.append(len(path_tokens)) # Path Token Count # does the url contain an IP address has_ip = ip_presence(removed_protocol) vec.append(has_ip) # Presence of ip address Yes:1, No:0 # get the alexa page rank has_alexa_rank = alexa_pagerank(dom, conn) vec.append(has_alexa_rank) # does page use ssl uses_https = check_https(mystr) vec.append(uses_https) # get country code and domain age calc country_code, dom_age_gt_1year = get_ip_info(dom) vec.append(country_code) # domain age gt 1 year vec.append(dom_age_gt_1year) # bag of words for word occurances word = bag_of_words(mystr) vec.append(word) # entropy of URL ent = Entropy(mystr) entropy = ent.H(mystr) vec.append(entropy) # count of special characters characters = special_chars(mystr) vec.append(characters) domain_tok_lengths = [] for i in domain_tokens: domain_tok_lengths.append(len(i)) largest_dom_token_len = max(domain_tok_lengths) vec.append(largest_dom_token_len) # Largest Domain Token Length avg_dom_Tok_len = round( (float(sum(domain_tok_lengths)) / len(domain_tok_lengths)), 2) vec.append(avg_dom_Tok_len) # Average Domain Token Length path_tok_lengths = [] path_tok_dots = 0 path_tok_delims = 0 avg_path_Tok_len = 0 largest_path_token_len = 0 if len(path_tokens): for i in path_tokens: path_tok_lengths.append(len(i)) path_tok_dots = Total_Dots(i) path_tok_delims = Total_Delims(i) avg_path_Tok_len = round( (float(sum(path_tok_lengths)) / len(path_tok_lengths)), 2) largest_path_token_len = max(path_tok_lengths) vec.append(largest_path_token_len) # Largest Path Token Length vec.append(avg_path_Tok_len) # Average Path Token Length else: vec.append(largest_path_token_len ) # Largest Path Token Length :0 (No, Path Tokens) vec.append( avg_path_Tok_len) # Average Path Token Length :0 (No, Path Tokens) # print('Largest Path Token Length:',largest_path_token_len) # print('Path Token Total Dots:',path_tok_dots) # print('Path Token Delims:',path_tok_delims) if has_ip: vec.append(0) # Ip address present so no suspicious TLD else: for i in Suspicious_TLD: if re.search(i, TLD, re.IGNORECASE): vec.append(1) # Suspicious TLD break else: vec.append(0) # Non Suspicious TLD if file_n_args != '': # Define Condition whether file and arguments present in the URL # POST arguments are conditions passed after the ? # file (filenames) are items such as index.html tmp = file_n_args.split('?') file = tmp[0] if len(tmp) > 1: args = tmp[1] else: args = '' # print('File:',file) # print('Arguments:',args) if not file: vec.append(0) else: vec.append(1) vec.append(len(file)) # Length of file vec.append(Total_Dots(file)) # Total_Dots in file name vec.append(Total_Delims(file)) # Total_Delims in file name # print('Total dots in file: ',Total_Dots(file)) # print('Total Delims in file: ',Total_Delims(file)) if args == '': # Checking if any POST arguments present in the URL or not vec.append(0) # no arguments present in url vec.append(0) # Length of Argument Appended to the Vector vec.append(0) # Number of Variables Appended to the Vector vec.append( 0) # Length of larges variable value Appended to the Vector vec.append(0) # Maximum number of Delims Appended to the Vector # print('argument length:',0) # print('number of arguments:',0) # print('length of Largest variable value:',0) # print('Maximun no of delims:',0) else: # indicated Presence of POST arguments in the URL vec.append(1) # arguments are present vec.append(len(args) + 1) # Length of Argument Appended to the Vector # print('argument length:',len(args)+1) arb = args.split('&') vec.append(len(arb)) # Number of Arguments Appended to the Vector # print('Number of arguments',len(arb)) len_var = [] max_delim = [] for i in arb: # Spliting POST Arguments around '=' sign tmp = i.split('=') if len(tmp) > 1: len_var.append(len(tmp[1])) max_delim.append(Total_Delims(tmp[0])) max_delim.append(Total_Delims(tmp[1])) else: len_var.append(0) max_delim.append(0) vec.append(max(len_var)) # Length of Largest variable value # print('length of Largest variable value:',max(len_var)) max_delim = max(max_delim) vec.append(max_delim) # Maximum number of Delimeters # print('Maximum no of delims:',max_delim) else: # Defines condition to the corresponding if that File and Arguments are not Present in the URL so # Just Append 0 to the corresponding Parameter in the Vector vec.append(0) # has file name in url vec.append(0) # Length of file Appended to the Vector vec.append(0) # Total_Dots in file name Appended to the Vector vec.append(0) # Total_Delims in file name Appended to the Vector vec.append(0) # has arguments appended to url vec.append(0) # Length of Argument Appended to the Vector vec.append(0) # Number of Variables Appended to the Vector vec.append(0) # Length of larges variable value Appended to the Vector vec.append(0) # Maximum number of Delims Appended to the Vector # print('argument length:',0) # print('number of arguments:',0) return vec
#!/usr/bin/env python3 import sys from entropy import Entropy if len(sys.argv) != 3: print("Correct usage: python3 run.py </path/to/file> <N>") else: file_path = sys.argv[1] n = int(sys.argv[2]) elements = [] with open(file_path, 'r') as file: for line in file: elements.extend(list(line)) entropy = Entropy(elements) # If memory equals N, we need to consider N + 1 elements at a time. print(f"Entropy with N = {n}: {entropy.entropy(n + 1)}")
def img_analysis(image_path: str): counter = Entropy() with open(image_path, 'rb+') as f: # read the header id_length = one_byte(f) colour_map_type = one_byte(f) image_type = one_byte(f) # colour map specification first_entry_index = byte_list(f, 2) colour_map_length = byte_list(f, 2) colour_map_entry_size = ord(f.read(1)) # image specification x_origin = byte_list(f, 2) y_origin = byte_list(f, 2) image_width = int_from_bytes(byte_list(f, 2)) image_height = int_from_bytes(byte_list(f, 2)) pixel_depth = one_byte(f) image_descriptor = one_byte(f) # create a two-line pixel buffer # the pixel on the most left is always black # 0 -> top row # 1 -> current row buffer = [[(0, 0, 0) for _ in range(0, image_width + 1)] for _ in [1, 2]] # load the first row of pixels for pixel in range(1, image_width + 1): # for every pixel load three bytes represeting BGR colours buffer[0][pixel] = byte_list(f, 3) for line in range(0, image_height): # take the top row as the current one # (use switching as a hackaround, because of pointers) buffer[1], buffer[0] = buffer[0], buffer[1] # and load another row on top if line != image_height - 1: for pixel in range(1, image_width + 1): buffer[0][pixel] = byte_list(f, 3) else: # if this is the last row, the top row needs to be # a row of black pixels for pixel in range(1, image_width + 1): buffer[0][pixel] = (0, 0, 0) # loop through the loaded pixels for i in range(1, image_width + 1): pixel = buffer[1][i] west = buffer[1][i - 1] north = buffer[0][i] northwest = buffer[0][i - 1] # do all the predictions # \hat{X} = (0,0,0) counter.register_char('normal', pixel) # \hat{X} = W hat_x = west counter.register_char('W', subtract_pixels(pixel, hat_x)) # \hat{X} = N hat_x = north counter.register_char('N', subtract_pixels(pixel, hat_x)) # \hat{X} = NW hat_x = northwest counter.register_char('NW', subtract_pixels(pixel, hat_x)) # \hat{X} = N + W - NW hat_x = subtract_pixels(add_pixels(north, west), northwest) counter.register_char('N + W - NW', subtract_pixels(pixel, hat_x)) # \hat{X} = N + (W - NW)/2 hat_x = add_pixels( north, scale_pixel(subtract_pixels(west, northwest), 0.5)) counter.register_char('N + (W - NW)/2', subtract_pixels(pixel, hat_x)) # \hat{X} = W + (N - NW)/2 hat_x = add_pixels( west, scale_pixel(subtract_pixels(north, northwest), 0.5)) counter.register_char('W + (N - NW)/2', subtract_pixels(pixel, hat_x)) # \hat{X} = (N + W)/2 hat_x = scale_pixel(add_pixels(north, west), 0.5) counter.register_char('(N + W)/2', subtract_pixels(pixel, hat_x)) # new standard hat_x = [0, 0, 0] # perform the algorithm for every colour discretely for i in range(0, 3): if northwest[i] >= max(west[i], north[i]): hat_x[i] = max(west[i], north[i]) elif northwest[i] <= min(west[i], north[i]): hat_x[i] = min(west[i], north[i]) else: hat_x[i] = west[i] + north[i] - northwest[i] hat_x = tuple(hat_x) counter.register_char('new standard', subtract_pixels(pixel, hat_x)) return counter
def __init__(self, features_list, labels_list): self.features_list = features_list self.labels_list = labels_list self.data_set_entries_count = features_list.shape[0] self.number_of_features = features_list.shape[1] - 1 self.base_entropy = Entropy(features_list, labels_list).value()
def __init__(self, data_set): self.data_set = data_set self.data_set_entries_count = len(data_set) self.number_of_features = len(data_set[0]) - 1 self.base_entropy = Entropy(data_set).value()