def service_worker_requests_logs(id, log_file):
    sw_logs =[]
    if log_file:
        line = log_file.readline()
        while line:
            if 'Service Worker' in line:
                sw_item = {}
                while line:                    
                    if 'Service Worker' in line:
                        time = line[line.index('@')+1:line.index(']')]
                        time = datetime.strptime(time, ' %Y-%m-%d %H:%M:%S ')
                        sw_item['timestamp']=str(time)
                        sw_item['info'] = line[:line.index('@')].strip('').replace('[','')
                    if 'Origin' in line:
                        sw_item['sw_url'] = line.split('::')[1]
                    if 'URL ::' in line:
                        #print(line)
                        sw_item['target_url'] = line.split('::')[1]
                    if '||' in line or '***' in line:                        
                        if sw_item:
                            if 'sw_url' not in sw_item:
                                sw_item['sw_url']=''
                            sw_item['log_id'] = id
                            sw_logs.append(sw_item)
                            dbo = db_operations.DBOperator()
                            dbo.insert_service_wroker_event(sw_item)
                        break
                    line = log_file.readline()
            line = log_file.readline()
    return sw_logs
def process_resource_file(resources_tar_dir, i, id):
    try:
        ## Calculate hash of each resource file and get a list of URLs contacted by any script file
        with tarfile.open(resources_tar_dir, 'r') as tar:
            for tarinfo in tar:
                if tarinfo.isreg():
                    flo = tar.extractfile(tarinfo)
                    hash = hashlib.sha1()
                    file_name = tarinfo.name.split('/')[2]
                    #print(file_name)
                    while True:
                        data = flo.read(2**20)
                        if not data:
                            break
                        hash.update(data)
                    flo.close()
                    flo = tar.extractfile(tarinfo)
                    urls = flo.read()
                    links = re.findall('"((http)s?://.*?)"', urls)
                    links2 = re.findall("'((http)s?://.*?)'", urls)
                    links = [l[0] for l in links] + [l[0] for l in links2]
                    contacted_urls = ' :: '.join(links)
                    flo.close()

                    dbo = db_operations.DBOperator()
                    dbo.insert_resource_info(id, i, file_name,
                                             hash.hexdigest(), contacted_urls)
    except Exception as e:
        print('process resources')
        print(e)
def process_detailed_logs():
    dir_path = './containers_data/'
    processed_dir_path = './processed_data/'

    for i in range(16):
        for file in os.listdir(dir_path):
            id = file.replace('container_', '')
            if os.path.exists(dir_path + file + '/' + str(i)):
                print('Processing folder ' + dir_path + file + '/' + str(i))
                sw_log_tar_dir = dir_path + file + '/' + str(i) + '/logs.tar'
                chrome_tar_dir = dir_path + file + '/' + str(
                    i) + '/chrome_log.tar'
                resources_tar_dir = dir_path + file + '/' + str(
                    i) + '/resources.tar'
                log_file = None
                chrome_file = None
                try:
                    t = tarfile.open(sw_log_tar_dir, 'r')
                    log_name = 'logs/' + id + '_sw.log'
                    if log_name in t.getnames():
                        log_file = t.extractfile(log_name)
                    t2 = tarfile.open(chrome_tar_dir, 'r')
                    chrome_log_name = 'chrome_debug.log'
                    if chrome_log_name in t2.getnames():
                        chrome_file = t2.extractfile(chrome_log_name)

                    ## Process the files requested by SW
                    process_resource_file(resources_tar_dir, i, id)

                    ## Process the logs and filter required events
                    try:
                        formatted_logs = extract_chain.parse_log(
                            id, i, chrome_file, log_file)
                    except Exception as e:
                        print('Extract chain')
                        print(e)

                    dbo = db_operations.DBOperator()
                    ## Insert the processed events to DBB
                    for log in formatted_logs:
                        dbo.insert_logs(i, log)

                    ## Find all URLs that was involved for this viit
                    parse_results_urls(id, i)

                    ## Move the processed data to a different folder
                    try:
                        shutil.move(dir_path + file + '/' + str(i),
                                    processed_dir_path + file + '/' + str(i))
                    except Exception as e:
                        print('Move file :' + e)

                except Exception as e:
                    print('Procees detailed logs')
                    print(e)
def parse_results_urls(id, count):
    dir_path = './event_logs/'
    file = id + '_' + str(count) + '.log'
    with open(dir_path + file, 'r') as event_file:
        line = event_file.readline()
        dbo = db_operations.DBOperator()
        while line:
            if 'URL ::' in line:
                url = (line.split('::')[1]).strip(' ')
                dbo.insert_url(id, url, '', 'other')
            line = event_file.readline()
 def process_notification(self, f, timestamp):
     entries = parse_log_entry(f)
     frame_url = entries['frame_url']
     url = entries['push_notification_target_url']
     entries['push_notification_target_url'] = url[url.index('http'):] if 'http' in url else ''
     notification_target_url = entries['push_notification_target_url']
     notification_img_url = entries['push_notification_image']
     notification_icon_url= entries['push_notification_icon'] if 'push_notification_icon' in entries else ''
     notification_body = entries['push_notification_body']
     notification_title = entries['push_notification_title']   
     notification_tag = entries['push_notification_tag'] if 'push_notification_tag' in entries else ''
     entries['timestamp'] = timestamp     
     entries['log_id'] = self.log_id
     self.notification_count +=1
     entries['notification_count'] = self.notification_count
     dbo = db_operations.DBOperator()
     #dbo.insert_notification(entries)
     self.notification_logs.append({'timestamp':timestamp,'message':'Notification from: '+frame_url})
     self.notification_logs.append({'timestamp':timestamp,'message':'Notification shown: '+
     ' && '.join([str(self.notification_count),notification_title,notification_body,frame_url,notification_tag, notification_img_url, notification_target_url,notification_icon_url])})
import os

from docker_config import *
from docker_monitor import *
from database import db_operations
from api_calls import api_requests

import logging

logging.basicConfig(filename='output_new.log',
                    filemode='w',
                    format='%(name)s - %(levelname)s - %(message)s',
                    level=logging.INFO)

client = docker.from_env()
dbo = db_operations.DBOperator()


def process_urls_parallel(analysis_urls, script_file, container_timeout,
                          max_containers):
    futures = {}
    processed_url_ids = set()
    urls = analysis_urls.copy()
    with concurrent.futures.ProcessPoolExecutor(
            max_workers=max_containers) as executor:
        while len(urls) > 0:
            ## Submit jobs to container ##
            for i in range(min(len(urls), max_containers)):
                id = urls.keys()[0]
                itm = urls.pop(id)
                url = itm['url']
 def process_requests(self, f, timestamp):
     entries = parse_log_entry(f)
     entries['timestamp'] = timestamp     
     entries['log_id'] = self.log_id
     dbo = db_operations.DBOperator()
     dbo.insert_request(entries)