def service_worker_requests_logs(id, log_file): sw_logs =[] if log_file: line = log_file.readline() while line: if 'Service Worker' in line: sw_item = {} while line: if 'Service Worker' in line: time = line[line.index('@')+1:line.index(']')] time = datetime.strptime(time, ' %Y-%m-%d %H:%M:%S ') sw_item['timestamp']=str(time) sw_item['info'] = line[:line.index('@')].strip('').replace('[','') if 'Origin' in line: sw_item['sw_url'] = line.split('::')[1] if 'URL ::' in line: #print(line) sw_item['target_url'] = line.split('::')[1] if '||' in line or '***' in line: if sw_item: if 'sw_url' not in sw_item: sw_item['sw_url']='' sw_item['log_id'] = id sw_logs.append(sw_item) dbo = db_operations.DBOperator() dbo.insert_service_wroker_event(sw_item) break line = log_file.readline() line = log_file.readline() return sw_logs
def process_resource_file(resources_tar_dir, i, id): try: ## Calculate hash of each resource file and get a list of URLs contacted by any script file with tarfile.open(resources_tar_dir, 'r') as tar: for tarinfo in tar: if tarinfo.isreg(): flo = tar.extractfile(tarinfo) hash = hashlib.sha1() file_name = tarinfo.name.split('/')[2] #print(file_name) while True: data = flo.read(2**20) if not data: break hash.update(data) flo.close() flo = tar.extractfile(tarinfo) urls = flo.read() links = re.findall('"((http)s?://.*?)"', urls) links2 = re.findall("'((http)s?://.*?)'", urls) links = [l[0] for l in links] + [l[0] for l in links2] contacted_urls = ' :: '.join(links) flo.close() dbo = db_operations.DBOperator() dbo.insert_resource_info(id, i, file_name, hash.hexdigest(), contacted_urls) except Exception as e: print('process resources') print(e)
def process_detailed_logs(): dir_path = './containers_data/' processed_dir_path = './processed_data/' for i in range(16): for file in os.listdir(dir_path): id = file.replace('container_', '') if os.path.exists(dir_path + file + '/' + str(i)): print('Processing folder ' + dir_path + file + '/' + str(i)) sw_log_tar_dir = dir_path + file + '/' + str(i) + '/logs.tar' chrome_tar_dir = dir_path + file + '/' + str( i) + '/chrome_log.tar' resources_tar_dir = dir_path + file + '/' + str( i) + '/resources.tar' log_file = None chrome_file = None try: t = tarfile.open(sw_log_tar_dir, 'r') log_name = 'logs/' + id + '_sw.log' if log_name in t.getnames(): log_file = t.extractfile(log_name) t2 = tarfile.open(chrome_tar_dir, 'r') chrome_log_name = 'chrome_debug.log' if chrome_log_name in t2.getnames(): chrome_file = t2.extractfile(chrome_log_name) ## Process the files requested by SW process_resource_file(resources_tar_dir, i, id) ## Process the logs and filter required events try: formatted_logs = extract_chain.parse_log( id, i, chrome_file, log_file) except Exception as e: print('Extract chain') print(e) dbo = db_operations.DBOperator() ## Insert the processed events to DBB for log in formatted_logs: dbo.insert_logs(i, log) ## Find all URLs that was involved for this viit parse_results_urls(id, i) ## Move the processed data to a different folder try: shutil.move(dir_path + file + '/' + str(i), processed_dir_path + file + '/' + str(i)) except Exception as e: print('Move file :' + e) except Exception as e: print('Procees detailed logs') print(e)
def parse_results_urls(id, count): dir_path = './event_logs/' file = id + '_' + str(count) + '.log' with open(dir_path + file, 'r') as event_file: line = event_file.readline() dbo = db_operations.DBOperator() while line: if 'URL ::' in line: url = (line.split('::')[1]).strip(' ') dbo.insert_url(id, url, '', 'other') line = event_file.readline()
def process_notification(self, f, timestamp): entries = parse_log_entry(f) frame_url = entries['frame_url'] url = entries['push_notification_target_url'] entries['push_notification_target_url'] = url[url.index('http'):] if 'http' in url else '' notification_target_url = entries['push_notification_target_url'] notification_img_url = entries['push_notification_image'] notification_icon_url= entries['push_notification_icon'] if 'push_notification_icon' in entries else '' notification_body = entries['push_notification_body'] notification_title = entries['push_notification_title'] notification_tag = entries['push_notification_tag'] if 'push_notification_tag' in entries else '' entries['timestamp'] = timestamp entries['log_id'] = self.log_id self.notification_count +=1 entries['notification_count'] = self.notification_count dbo = db_operations.DBOperator() #dbo.insert_notification(entries) self.notification_logs.append({'timestamp':timestamp,'message':'Notification from: '+frame_url}) self.notification_logs.append({'timestamp':timestamp,'message':'Notification shown: '+ ' && '.join([str(self.notification_count),notification_title,notification_body,frame_url,notification_tag, notification_img_url, notification_target_url,notification_icon_url])})
import os from docker_config import * from docker_monitor import * from database import db_operations from api_calls import api_requests import logging logging.basicConfig(filename='output_new.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s', level=logging.INFO) client = docker.from_env() dbo = db_operations.DBOperator() def process_urls_parallel(analysis_urls, script_file, container_timeout, max_containers): futures = {} processed_url_ids = set() urls = analysis_urls.copy() with concurrent.futures.ProcessPoolExecutor( max_workers=max_containers) as executor: while len(urls) > 0: ## Submit jobs to container ## for i in range(min(len(urls), max_containers)): id = urls.keys()[0] itm = urls.pop(id) url = itm['url']
def process_requests(self, f, timestamp): entries = parse_log_entry(f) entries['timestamp'] = timestamp entries['log_id'] = self.log_id dbo = db_operations.DBOperator() dbo.insert_request(entries)