def process_urls_with_notifications():

    URL_COUNT = 300
    notification_urls = fetch_urls_with_notifications(URL_COUNT)

    while True:
        processed_ids = process_urls_parallel(notification_urls,
                                              notification_collection_script,
                                              ANALYSIS_TIMEOUT,
                                              ANALYSIS_MAX_CONTAINERS)

        for id in processed_ids:
            api_requests.update_url_api(id, 'is_analyzed_desktop', 'true')
            api_requests.update_url_api(id, 'visit_status', '111')

        logging.info(processed_ids)
        ## Retain only those containers that requested notifications
        notification_urls = {
            id: info
            for id, info in notification_urls.items()
            if info['count'] > 0 or id in processed_ids
        }
        for key in notification_urls.keys():
            itm = notification_urls[key]
            if itm['count'] == 15:
                ## Resume each containe maximum of 15 times
                notification_urls.pop(key)
            else:
                itm['count'] = itm['count'] + 1

        notification_urls.update(
            fetch_urls_with_notifications(URL_COUNT - len(notification_urls)))
        logging.info(notification_urls)
        time.sleep(1800)
Ejemplo n.º 2
0
def fetch_urls_for_crawling():
    results = api_requests.fetch_urls_api(180, 'false', 'false')
    crawl_urls = {}
    for item in results:
        id = item[0]
        url = item[1]
        crawl_urls[id] = url
        api_requests.update_url_api(id, 'visit_status', '-1')
    return crawl_urls
Ejemplo n.º 3
0
def process_urls_parallel(analysis_urls, script_file, container_timeout,
                          max_containers):
    futures = {}
    processed_url_ids = []
    urls = analysis_urls.copy()

    with concurrent.futures.ThreadPoolExecutor(
            max_workers=max_containers) as executor:
        while len(urls) > 0:
            ## Submit jobs to container ##
            for i in range(min(len(urls), max_containers)):
                id = urls.keys()[0]
                url = urls.pop(id)
                futures[executor.submit(initiate_container, url, str(id),
                                        script_file, 0,
                                        container_timeout)] = str(id)
            res_futures = concurrent.futures.wait(
                futures,
                timeout=container_timeout,
                return_when=concurrent.futures.ALL_COMPLETED)

            for future in res_futures[0]:
                id = futures.pop(future)
                res = -1
                try:
                    res = future.result(timeout=container_timeout)
                except Exception as exc:
                    print(get_time() + 'Container_' + str(id) + ': Exception ')
                    print(exc)

                res = export_log(id)
                if res > 0:
                    print(get_time() + 'Container_' + str(id) +
                          ': URL Visited successfully!!')
                    api_requests.update_url_api(id, 'is_visited', 'true')
                    api_requests.update_url_api(id, 'visit_status', '1')
                    processed_url_ids.append(id)
                elif res == -99:
                    print(get_time() + 'Container_' + str(id) +
                          ': Chromium Crashed!!')
                    api_requests.update_url_api(id, 'visit_status', '3')
                else:
                    print(get_time() + 'Container_' + str(id) +
                          ': URL Visit failed!!')
                    api_requests.update_url_api(id, 'visit_status', '2')

            for future in res_futures[1]:
                id = futures.pop(future)
                print(get_time() + 'Container_' + str(id) +
                      ': Timeout occured!!')
                stop_container(id)
                export_log(id)
                api_requests.update_url_api(id, 'is_visited', 'false')

    return processed_url_ids
def fetch_urls_with_notifications(count):
    if count > 0:
        logging.info('Fetching URLS ::' + str(count))
        results = api_requests.fetch_urls_api(count, 'true', 'true')
        #results = dbo.get_seed_urls()
        crawl_urls = {}
        for item in results:
            id = item[0]
            url = item[1]
            crawl_urls[str(id)] = {'url': url, 'count': 0}
            api_requests.update_url_api(id, 'visit_status', '11')
        return crawl_urls
    return {}
def check_permission_request():
	for file in os.listdir(dir_path):
		print('Processiing '+file)
		try:
			log_tar_dir = dir_path+file+'/chrome_log.tar'
			t = tarfile.open(log_tar_dir,'r')
			log_name = 'chrome_debug.log'
			res=-1
			if log_name in t.getnames():
				f = t.extractfile(log_name)
				data = f.read()
				res = data.find('MalNotifications :: Permission Requested')	
			if res >-1:
				id = file.replace('container_','')
				status = api_requests.update_url_api(id,'has_permission_request','true')
				print(status)
				api_requests.update_url_api(id,'is_visited','true')
				print('Permission Request found :: '+id)
		except Exception as e:
			print(e)
			continue
def get_app_server_key():
	for file in os.listdir(dir_path):
		print('Processiing '+file)
		try:
			log_tar_dir = dir_path+file+'/chrome_log.tar'
			t = tarfile.open(log_tar_dir,'r')
			log_name = 'chrome_debug.log'
			res=-1
			print(t.getnames())
			id = file.replace('container_','')
			if log_name in t.getnames():
				f = t.extractfile(log_name)
				data = f.readlines()
				sub = 'MalNotifications :: Endpoint'
				indexes = map(str,range(len(data)))
				res = filter(lambda (i,x): sub in x,zip(indexes,data))
				print('Result')
				print(res)
				if res:
					i = int(res[0][0])
					lines = data[i-12:i+2]
					endpoint = data[i].split('::')[2].strip().strip('\n')
					print(endpoint)
					status = api_requests.update_url_api(id,'endpoint',endpoint)
					print(status)
					for line in lines:
						if 'Application Server Key' in line:
							app_key = line.split('::')[2].strip().strip('\n')
							print(app_key)
							status = api_requests.update_url_api(id,'app_server_key',app_key)
							print(status)
						if 'Auth Secret' in line:
							auth_key = line.split('::')[2].strip().strip('\n')
							print(auth_key)
							status = api_requests.update_url_api(id,'auth_secret',auth_key)
							print(status)
		except Exception as e:
			print(e)
			continue
		time.sleep(5)