def __init__(self, debug=False, path=None, hash_path=None, backup_path=None): """ Initialize Monitor class. Args: debug (bool): Log on terminal or not path (str): Path of the directory to monitor hash_path (str): Path of the original hash mapping of files (JSON config) backup_path (str): Path of the backup mapping of files (JSON config) Raises: None Returns: None """ # Initialize debug self.debug = debug # Initialize logger self.logger = DefaceLogger(__name__, debug=self.debug) # Create GatherFile object to gather list of files self.gather_file_obj = GatherFile(debug=self.debug, path=path) # Create Hash object to get hashes of the files self.hash_gen_obj = Hash(debug=self.debug) # Load original hash config of files self.cache_hash = json_to_dict(hash_path) # Load backup mapping config self.back_up_dict = json_to_dict(backup_path)
class SigDetect(object): """SigDetect class.""" def __init__(self, debug=False, path=None): """ Initialize SigDetect. Args: debug (bool): Log on terminal or not path (str): Path of the directory to scan files for Raises: None Returns: None """ # Initialize logger self.logger = DefaceLogger(__name__, debug=debug) # Initialize path of directory to look for self._PATH = path self._SIG_PATH = os.path.abspath( os.path.dirname(__file__)) + '/config/signatures.txt' def scan_files(self, files_list): """ Scan the files in the directory to check for Attack Signatures Args: None Raises: None Returns: found_files (list): List of files after scanning """ defacement_status = {} with open(self._SIG_PATH) as sign: signatures = sign.readlines() signatures = [x.strip() for x in signatures] for file in files_list: try: with open(file, "r") as rf: file_content = rf.read() for sign in signatures: defacement_status[file] = sign in file_content except FileNotFoundError: pass except Exception as e: self.logger.log('Error occurred: ' + str(e), logtype='error') # Return path to hash value dictionary return defacement_status
def __init__(self, debug=False): """Intialize Hash class. Args: debug (bool): Log on terminal or not Raises: None Returns: None """ # Initialize logger self.logger = DefaceLogger(__name__, debug=debug)
class GatherFile(object): """GatherFile class.""" def __init__(self, debug=False, path=None): """ Initialize GatherFile. Args: debug (bool): Log on terminal or not path (str): Path of the directory to scan files for Raises: None Returns: None """ # Initialize logger self.logger = DefaceLogger(__name__, debug=debug) # Initialize path of directory to look for self._PATH = path def scan_dir(self): """ Scan directory to get the list of files. Args: None Raises: None Returns: found_files (list): List of files after scanning """ found_files = [] # Initialize empty list of found files try: # Iterate through the directory for root, _, files in os.walk(self._PATH): for file in files: found_files.append(os.path.join(root, file)) except Exception as e: self.logger.log("Error occurred: " + str(e), logtype="error") # Return the list of found files return found_files
def __init__(self, debug=False, path=None): """ Initialize GatherFile. Args: debug (bool): Log on terminal or not path (str): Path of the directory to scan files for Raises: None Returns: None """ # Initialize logger self.logger = DefaceLogger(__name__, debug=debug) # Initialize path of directory to look for self._PATH = path
def __init__(self, debug=False): """Initialize BackUp. Args: debug (bool): Log on terminal or not Raises: None Returns: None """ # Initialize logger self.logger = DefaceLogger(__name__, debug=debug) # Cache / Back-up directory path self._CACHE_DIR = "/etc/securetea/web_deface/cache_dir" # Original path to back-up file path mapping self.back_up_mapping = dict() # List of file names already mapped self.file_names = []
def __init__(self, debug=False, path=None): """ Initialize SigDetect. Args: debug (bool): Log on terminal or not path (str): Path of the directory to scan files for Raises: None Returns: None """ # Initialize logger self.logger = DefaceLogger(__name__, debug=debug) # Initialize path of directory to look for self._PATH = path self._SIG_PATH = os.path.abspath( os.path.dirname(__file__)) + '/config/signatures.txt'
def __init__(self, debug=False, path=None): """ Initialize DefaceDetect debug (bool): Log on terminal or not path (str): Path of the directory to scan file for Raises: None Returns: None """ #intialize logger self.logger = DefaceLogger( __name__, debug=debug ) # Initialize path of directory to look for self._PATH = path self._DATASET = str(Path(os.path.dirname(__file__)).parent) + "/web_deface/config/dataset.csv"
def __init__(self, debug=False): """ Dataset file have texts from defaced webpages and Text from Normal webpages """ self.logger = DefaceLogger(__name__, debug=debug) #initialize path of datasets self.NORMAL_DATA_PATH = str(Path( os.path.dirname(__file__)).parent) + "/web_deface/config/data1.csv" self.DEFACED_DATA_PATH = str(Path( os.path.dirname(__file__)).parent) + "/web_deface/config/data.csv" self.prediction()
def __init__(self, debug=False, path=None, server_name=None): """ Initialize WebDeface. Args: debug (bool): Log on terminal or not path (str): Path of the directory to monitor server_name (str): Name of the server (apache/nginx/etc.) Raises: None Returns: None """ # Initialize logger self.logger = DefaceLogger( __name__, debug=debug ) if check_root(): # if running as root self.logger.log( "Initializing SecureTea Web Deface Detection", logtype="info" ) # Create Engine object self.engine_obj = Engine(debug=debug, path=path, server_name=server_name) else: self.logger.log( "Please run as root, exiting.", logtype="error" ) sys.exit(0)
class BackUp(object): """BackUp class.""" def __init__(self, debug=False): """Initialize BackUp. Args: debug (bool): Log on terminal or not Raises: None Returns: None """ # Initialize logger self.logger = DefaceLogger(__name__, debug=debug) # Cache / Back-up directory path self._CACHE_DIR = "/etc/securetea/web_deface/cache_dir" # Original path to back-up file path mapping self.back_up_mapping = dict() # List of file names already mapped self.file_names = [] def check_dir(self, path): """ Check whether the directory exists or not. If directory does not exist, create one. Args: path (str): Path of the directory Raises: None Returns: None """ try: if not os.path.isdir(path): Path(path).mkdir() except FileExistsError: os.remove(path) # remove file to create directory self.check_dir(path) # recursively check for any other file except FileNotFoundError: # Create directory recursively new_path = "/".join(path.split("/")[:-1]) self.check_dir(path=new_path) def gen_backup(self, files_list): """ Generate backup / cache of the files. Args: files_list (list): List of files path to backup Raises: None Returns: None """ self.check_dir( self._CACHE_DIR) # Check whether the directory exists or not for file in files_list: file_name = self.get_file_name(file) new_path = self._CACHE_DIR + "/" + file_name # Check if sub-dir exists or not self.check_dir("/".join( new_path.split("/")[:-1])) # if not, create one msg = "Generating backup, copying: " + file + " to: " + new_path self.logger.log(msg, logtype="info") copy(file, new_path) msg = "Copied: " + file + " to: " + new_path self.logger.log(msg, logtype="info") # Update original path to back-up path mapping dict self.back_up_mapping[file] = new_path # Return original path to back-up path mapping dict return self.back_up_mapping def get_file_name(self, file_path, index=-1): """ Recursively extract name of the file from the path. Args: file_path (str): Path of the file index (int): Index of the file name Raises: None Returns: file_name (str): Name of the file """ file_name = file_path.split("/")[index:] file_name = "/".join(file_name) if file_name not in self.file_names: self.file_names.append(file_name) return file_name.strip("/") else: return self.get_file_name(file_path, index=index - 1)
class Hash(object): """Hash class.""" def __init__(self, debug=False): """Intialize Hash class. Args: debug (bool): Log on terminal or not Raises: None Returns: None """ # Initialize logger self.logger = DefaceLogger(__name__, debug=debug) @staticmethod def extractBytes(file_path): """ Extracts and returns bytes of the file. Args: data (str): String data to encode Returns: bytes: Encoded data Raises: None """ with open(file_path, "rb") as rf: return rf.read() def hash_value(self, files_list): """ Calculate SHA256 hash value of the passed bytes. Args: data (bytes): Data to calculate SHA256 hash for Returns: SHA256 Hash value Raises: None """ # Initialize empty path to hash value dictionary hash_dict = dict() for file_path in files_list: try: extracted_bytes = self.extractBytes(file_path) hash_value = hashlib.sha256(extracted_bytes).hexdigest() hash_dict[file_path] = hash_value except FileNotFoundError: pass except Exception as e: self.logger.log("Error occurred: " + str(e), logtype="error") # Return path to hash value dictionary return hash_dict
class DefaceDetect(object): """ML based defacement Detector""" def __init__(self, debug=False, path=None): """ Initialize DefaceDetect debug (bool): Log on terminal or not path (str): Path of the directory to scan file for Raises: None Returns: None """ #intialize logger self.logger = DefaceLogger( __name__, debug=debug ) # Initialize path of directory to look for self._PATH = path self._DATASET = str(Path(os.path.dirname(__file__)).parent) + "/web_deface/config/dataset.csv" def ml_based_scan(self, files_list): """ Scan the files in the directory to detect any traces of Defacement attempts Args: file_list (dict): list of files in the directory to scan """ filename = str(Path(os.path.dirname(__file__)).parent) + "/web_deface/config/finalized_model.sav" with open(filename, "rb") as f: loaded_model = pickle.load(f) #Preparing User Webpage Dataset for Prediction h = html2text.HTML2Text() h.ignore_links = True fields = ["status", "content"] with open(self._DATASET, 'w') as csvfile: csvwriter = csv.writer(csvfile) csvwriter.writerow(fields) for file in files_list: try: code = open(file, 'r').read() code = h.handle(code) ' '.join(code.split('\n')) row = ['true', code] csvwriter.writerow(row) except Exception as e: msg = "File path unknown: " + file self.logger.log( msg, logtype="info" ) return {} df = pd.read_csv( self._DATASET, usecols = fields, nrows=5000 ) stemmer = SnowballStemmer('english') df['content'] = df['content'].apply( lambda x: ' '.join(stemmer.stem(y) for y in x.split()) ) df['content'] = df['content'].apply( lambda x: ' '.join(word for word in x.split() if word not in (stopwords.words())) ) df = df['content'].copy() df = df.str.replace('\d+', '', regex=True) tfidf = TfidfVectorizer(min_df=2,max_df=0.5, ngram_range=(1,3)) features = tfidf.fit_transform(df) df = pd.DataFrame( features.todense(), columns=tfidf.get_feature_names() ) df_model = pd.read_csv(str(Path(os.path.dirname(__file__)).parent) + "/web_deface/config/df.csv", index_col=0) df = df.reindex(labels=df_model.columns,axis=1) df['Target'] = '1' df = df.fillna(0) x = df.drop('Target',axis=1) pred = loaded_model.predict(x) return { files_list[i] : pred[i]=='1' for i in range(len(pred))}
class Monitor(object): """Monitor class.""" def __init__(self, debug=False, path=None, hash_path=None, backup_path=None): """ Initialize Monitor class. Args: debug (bool): Log on terminal or not path (str): Path of the directory to monitor hash_path (str): Path of the original hash mapping of files (JSON config) backup_path (str): Path of the backup mapping of files (JSON config) Raises: None Returns: None """ # Initialize debug self.debug = debug # Initialize logger self.logger = DefaceLogger(__name__, debug=self.debug) # Create GatherFile object to gather list of files self.gather_file_obj = GatherFile(debug=self.debug, path=path) # Create Hash object to get hashes of the files self.hash_gen_obj = Hash(debug=self.debug) # Load original hash config of files self.cache_hash = json_to_dict(hash_path) # Load backup mapping config self.back_up_dict = json_to_dict(backup_path) def copy_file(self, orig_path): """ Copy file from backup location to the original location using the backup mapping config. Args: orig_path (str): Path of the original file Raises: None Returns: None """ shutil.copy(self.back_up_dict[orig_path], orig_path) def monitor(self): """ Start the monitoring process to detect web deface. Look for the followings: 1. File addition 2. File deletion 3. File modification Args: None Raises: None Returns: None """ # Gather the list of files file_list = self.gather_file_obj.scan_dir() # Get the hash values of the files hash_dict = self.hash_gen_obj.hash_value(file_list) # Iterate through the hash values for path, hash_val in hash_dict.items(): if self.cache_hash.get( path): # if file exists in cache hash mapping if self.cache_hash[ path] != hash_val: # check if they are equal or not msg = "Web Deface detected, attempt to modify file: " + path self.logger.log(msg, logtype="warning") self.copy_file( path) # hash value not equal, file modified, copy file else: # hash value does not exist in cache, new file added msg = "Web Deface detected, attempt to add new file: " + path self.logger.log(msg, logtype="warning") try: os.remove(path) # remove the file except FileNotFoundError: pass except Exception as e: self.logger.log("Error occured: " + str(e), logtype="error") # Iterate through the cache hash to look for deletion for path, hash_val in self.cache_hash.items(): if not hash_dict.get( path): # if hash not in new hash, file deleted msg = "Web Deface detected, attempt to delete file: " + path self.logger.log(msg, logtype="warning") self.copy_file(path) # copy the deleted file from the backup
def __init__(self, debug=False, path=None, server_name=None): """ Initialize Engine. Args: debug (bool): Log on terminal or not path (str): Path of the directory to monitor server_name (str): Name of the server (apache/nginx/etc.) Raises: None Returns: None """ # Initialize debug self.debug = debug # Initialize logger self.logger = DefaceLogger(__name__, debug=self.debug) # Atleast the path or the server name is needed if ((path is None and server_name is None) or (path == "" and server_name == "")): msg = "Please specify either the path of web server files " + \ "or the name of the web server, exiting." self.logger.log(msg, logtype="error") sys.exit(0) # OS to Server file map path self._MAP_PATH = "/etc/securetea/web_deface/path_map.json" # Server file backup map path self._BACKUP_MAP = "/etc/securetea/web_deface/backup.json" # Server file hash map path self._HASH_MAP = "/etc/securetea/web_deface/hash.json" # Server file set map path self._SET_MAP = "/etc/securetea/web_deface/set.json" # Load the path map JSON configuration self.path_mapping_dict = json_to_dict(self._MAP_PATH) # Catergorize OS self.os_name = categorize_os() # Initialize directory path as None self._PATH = None try: if path is not None and path != "": self._PATH = str(path) else: if self.os_name: self._PATH = self.path_mapping_dict[ self.os_name][server_name] else: self.logger.log("Could not determine the OS, exiting.", logtype="error") sys.exit(0) except KeyError: self.logger.log( "Could not decide the suitable web server files path, exiting.", logtype="error") sys.exit(0) except Exception as e: self.logger.log("Error occurred: " + str(e), logtype="error") if self._PATH: # if directory path is valid # Gather files (create a list of files in the directory) self.gather_file_obj = gather_file.GatherFile(debug=self.debug, path=self._PATH) # Create Hash object self.hash_gen_obj = hash_gen.Hash(debug=self.debug) # Create BackUp object self.backup_obj = BackUp(debug=self.debug)
class Engine(object): """ Web Deface Detection Engine class. """ def __init__(self, debug=False, path=None, server_name=None): """ Initialize Engine. Args: debug (bool): Log on terminal or not path (str): Path of the directory to monitor server_name (str): Name of the server (apache/nginx/etc.) Raises: None Returns: None """ # Initialize debug self.debug = debug # Initialize logger self.logger = DefaceLogger(__name__, debug=self.debug) # Atleast the path or the server name is needed if ((path is None and server_name is None) or (path == "" and server_name == "")): msg = "Please specify either the path of web server files " + \ "or the name of the web server, exiting." self.logger.log(msg, logtype="error") sys.exit(0) # OS to Server file map path self._MAP_PATH = "/etc/securetea/web_deface/path_map.json" # Server file backup map path self._BACKUP_MAP = "/etc/securetea/web_deface/backup.json" # Server file hash map path self._HASH_MAP = "/etc/securetea/web_deface/hash.json" # Server file set map path self._SET_MAP = "/etc/securetea/web_deface/set.json" # Load the path map JSON configuration self.path_mapping_dict = json_to_dict(self._MAP_PATH) # Catergorize OS self.os_name = categorize_os() # Initialize directory path as None self._PATH = None try: if path is not None and path != "": self._PATH = str(path) else: if self.os_name: self._PATH = self.path_mapping_dict[ self.os_name][server_name] else: self.logger.log("Could not determine the OS, exiting.", logtype="error") sys.exit(0) except KeyError: self.logger.log( "Could not decide the suitable web server files path, exiting.", logtype="error") sys.exit(0) except Exception as e: self.logger.log("Error occurred: " + str(e), logtype="error") if self._PATH: # if directory path is valid # Gather files (create a list of files in the directory) self.gather_file_obj = gather_file.GatherFile(debug=self.debug, path=self._PATH) # Create Hash object self.hash_gen_obj = hash_gen.Hash(debug=self.debug) # Create BackUp object self.backup_obj = BackUp(debug=self.debug) def start(self): """ Start SecureTea Web Deface Detection. Args: None Raises: None Returns: None """ msg = "SecureTea Web Deface Detection started, monitoring files: " + self._PATH self.logger.log(msg, logtype="info") # Scan the directory for files and return the list of files files_list = self.gather_file_obj.scan_dir() # Find SHA 256 hash values for the file and return dict mapping of files to hash value hash_dict = self.hash_gen_obj.hash_value(files_list) # Find set values for the file and return dict mapping of files to sets set_dict = self.hash_gen_obj.get_sets(files_list) # Back-up the files and return dict mapping of original to back-up path backup_dict = self.backup_obj.gen_backup(files_list) # Dump back-up mapping dict to JSON dump_dict_to_json(path=self._BACKUP_MAP, py_dict=backup_dict) # Dump hash mapping dict to JSON dump_dict_to_json(path=self._HASH_MAP, py_dict=hash_dict) # Dump hash mapping dict to JSON dump_dict_to_json(path=self._SET_MAP, py_dict=set_dict) # Create monitor object self.monitor = Monitor(debug=self.debug, path=self._PATH, hash_path=self._HASH_MAP, set_path=self._SET_MAP, backup_path=self._BACKUP_MAP) while True: # Run in an endless monitor loop # Start the monitoring process self.monitor.monitor()
class Hash(object): """Hash class.""" def __init__(self, debug=False): """Intialize Hash class. Args: debug (bool): Log on terminal or not Raises: None Returns: None """ # Initialize logger self.logger = DefaceLogger( __name__, debug=debug ) @staticmethod def extractBytes(file_path): """ Extracts and returns bytes of the file described by file path. Args: file_path (str): Path to file Returns: bytes: Encoded data Raises: None """ with open(file_path, "rb") as rf: return rf.read() @staticmethod def extractFileContent(file_path): """ Extracts and returns contents of the file. Args: data (str): Path to file Returns: string: File data Raises: None """ with open(file_path, "r") as rf: return rf.read() def hash_value(self, files_list): """ Calculate SHA256 hash value of the passed bytes. Args: files_list (list): A list of files Returns: SHA256 Hash value Raises: None """ # Initialize empty path to hash value dictionary hash_dict = dict() for file_path in files_list: try: extracted_bytes = self.extractBytes(file_path) hash_value = hashlib.sha256(extracted_bytes).hexdigest() hash_dict[file_path] = hash_value except FileNotFoundError: pass except Exception as e: self.logger.log( "Error occurred: " + str(e), logtype="error" ) # Return path to hash value dictionary return hash_dict def get_sets(self, files_list): """ Get set of data that files contain. Args: files_list (list): A list of files Returns: Dictionary of sets Raises: None """ # Initialize empty path to set value dictionary set_dict = dict() for file_path in files_list: try: extracted_content = self.extractFileContent(file_path) # Convert to list so that it is json serializable set_content = list(set(extracted_content.split())) set_dict[file_path] = set_content except FileNotFoundError: pass except Exception as e: self.logger.log( "Error occurred: " + str(e), logtype="error" ) # Return path to hash value dictionary return set_dict
class Monitor(object): """Monitor class.""" def __init__(self, debug=False, path=None, hash_path=None, set_path=None, backup_path=None): """ Initialize Monitor class. Args: debug (bool): Log on terminal or not path (str): Path of the directory to monitor hash_path (str): Path of the original hash mapping of files (JSON config) backup_path (str): Path of the backup mapping of files (JSON config) Raises: None Returns: None """ # Initialize debug self.debug = debug # Initialize logger self.logger = DefaceLogger(__name__, debug=self.debug) # Create GatherFile object to gather list of files self.gather_file_obj = GatherFile(debug=self.debug, path=path) # Create SigDetect object to scan files for attack signatures self.sig_detect_obj = SigDetect(debug=self.debug, path=path) # Create DefaceDetect object to scan files for defacement attack using ML self.ml_deface_obj = DefaceDetect(debug=self.debug, path=path) # Create Hash object to get hashes of the files self.hash_gen_obj = Hash(debug=self.debug) # Load original hash config of files self.cache_hash = json_to_dict(hash_path) # Load original hash config of files self.cache_set = json_to_dict(set_path) # Load backup mapping config self.back_up_dict = json_to_dict(backup_path) def copy_file(self, orig_path): """ Copy file from backup location to the original location using the backup mapping config. Args: orig_path (str): Path of the original file Raises: None Returns: None """ shutil.copy(self.back_up_dict[orig_path], orig_path) def monitor(self): """ Start the monitoring process to detect web deface. Look for the followings: 1. File addition 2. File deletion 3. File modification Args: None Raises: None Returns: None """ # Gather the list of files file_list = self.gather_file_obj.scan_dir() # Get the hash values of the files hash_dict = self.hash_gen_obj.hash_value(file_list) # Get the set values of the files set_dict = self.hash_gen_obj.get_sets(file_list) # Get the defacement status of the files deface_status_dict = self.sig_detect_obj.scan_files(file_list) # Get the defacement predictions of the files ml_deface_prediction = self.ml_deface_obj.ml_based_scan(file_list) # Iterate through the hash values for path, hash_val in hash_dict.items(): if self.cache_hash.get( path): # if file exists in cache hash mapping if self.cache_hash[ path] != hash_val: # check if they are equal or not set1 = set(self.cache_set[path]) set2 = set(set_dict[path]) changed_content = ' '.join( list((set1 - set2).union(set2 - set1))) changed_content_msg = "File " + path + "Changed content includes : " + changed_content self.logger.log(changed_content_msg, logtype="warning") msg = "Web Deface detected, attempt to modify file: " + path self.logger.log(msg, logtype="warning") self.copy_file( path) # hash value not equal, file modified, copy file else: # hash value does not exist in cache, new file added msg = "Web Deface detected, attempt to add new file: " + path self.logger.log(msg, logtype="warning") try: os.remove(path) # remove the file except FileNotFoundError: pass except Exception as e: self.logger.log("Error occured: " + str(e), logtype="error") # Iterate through the cache hash to look for deletion for path, hash_val in self.cache_hash.items(): if not hash_dict.get( path): # if hash not in new hash, file deleted msg = "Web Deface detected, attempt to delete file: " + path self.logger.log(msg, logtype="warning") self.copy_file(path) # copy the deleted file from the backup # Iterate through the file content to look for attack signature and defacement status for path, defacement_status in deface_status_dict.items(): if defacement_status: msg = "Web Deface detected, attack signature found on file: " + path self.logger.log(msg, logtype="warning") self.copy_file( path) # hash value not equal, file modified, copy file for path, defacement_status in ml_deface_prediction.items(): if defacement_status: msg = "Web Deface detected, ML based defacement prediction model detects attack on file: " + path self.logger.log(msg, logtype="warning") self.copy_file( path) # defacement detected copy the file from the backup