def test_download(self): root = create_root_analysis(uuid=str(uuid.uuid4())) root.initialize_storage() root.details = {'hello': 'world'} root.save() temp_dir = tempfile.mkdtemp(dir=saq.TEMP_DIR) try: result = ace_api.download(root.uuid, temp_dir) self.assertTrue(os.path.join(temp_dir, 'data.json')) root = RootAnalysis(storage_dir=temp_dir) root.load() self.assertEquals(root.details, {'hello': 'world'}) finally: shutil.rmtree(temp_dir)
def execute_analysis(self, url): # don't run cloudphish on cloudphish alerts if self.root.alert_type == ANALYSIS_TYPE_CLOUDPHISH: return False # we keep track of what URLs we've given to cloudphish to process if self.state is None: self.state = {} self.state['requests'] = {} analysis = url.get_analysis(CloudphishAnalysis) if analysis is None: try: if len(self.state['requests'] ) >= self.cloudphish_request_limit: logging.info( f"skipping cloudphis analysis for {url.value} reached cloudphish limit for {self.root}" ) return False # do basic URL sanity checks parsed_url = urlparse(url.value) #if parsed_url.hostname and '.' not in parsed_url.hostname: #logging.debug("ignoring invalid FQDN {} in url {}".format(parsed_url.hostname, url.value)) #return False # only analyze http, https and ftp schemes if parsed_url.scheme not in ['http', 'https', 'ftp']: logging.debug( "{} is not a supported scheme for cloudphish".format( parsed_url.scheme)) return False # URL seems ok analysis = self.create_analysis(url) except Exception as e: logging.debug("possible invalid URL: {}: {}".format( url.value, e)) return False # start the clock XXX isn't this built-in to the delay analysis system? if analysis.query_start is None: analysis.query_start = int(time.time()) #else: ## or has the clock expired? #if int(time.time()) - analysis.query_start > self.query_timeout: #logging.warning("cloudphish query for {} has timed out".format(url.value)) #analysis.result = RESULT_ERROR #analysis.result_details = 'QUERY TIMED OUT' #return # do we have a local cache result for this url? sha256_url = hash_url(url.value) json_result = None # once we decide on a cloudphish server to use we need to keep using the same one # for the same url if 'cloudphish_server' in self.state: cloudphish_server = self.state['cloudphish_server'] else: cloudphish_server = self.get_cloudphish_server() self.state['cloudphish_server'] = cloudphish_server logging.debug("making cloudphish query against {} for {}".format( cloudphish_server, url.value)) try: context = { 'c': self.root.uuid, # context 't': None, # tracking (see below) } tracking = [] for o in self.root.all_observables: if o.has_directive(DIRECTIVE_TRACKED): tracking.append({ 'type': o.type, 'value': o.value, 'time': None if o.time is None else o.time.strftime(event_time_format_json_tz) }) context['t'] = json.dumps(tracking, cls=_JSONEncoder) response = ace_api.cloudphish_submit( url.value, context=context, remote_host=cloudphish_server, ssl_verification=saq.CA_CHAIN_PATH, proxies=saq.PROXIES if self.use_proxy else None, timeout=self.timeout) logging.debug( "got result {} for cloudphish query @ {} for {}".format( response, cloudphish_server, url.value)) except Exception as e: logging.warning("cloudphish request failed: {}".format(e)) analysis.result = RESULT_ERROR analysis.result_details = 'REQUEST FAILED ({})'.format(e) return True # check the results first # if the analysis isn't ready yet then we come back later if response[KEY_RESULT] == RESULT_OK: if response[KEY_STATUS] == STATUS_ANALYZING or response[ KEY_STATUS] == STATUS_NEW: # keep track of the requests that resulted in work for ACE self.state['requests'][url.value] = True # otherwise we delay analysis logging.info( "waiting for cloudphish analysis of {} ({})".format( url.value, response[KEY_STATUS])) if not self.delay_analysis(url, analysis, seconds=self.frequency, timeout_seconds=self.query_timeout): # analysis timed out analysis.result = RESULT_ERROR analysis.result_details = 'QUERY TIMED OUT' return True # sha256 E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855 is the hash for the empty string # we ignore this case if response[KEY_SHA256_CONTENT] and response[KEY_SHA256_CONTENT].upper() == \ 'E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855': logging.debug("ignoring result of 0 length data for {}".format( url.value)) analysis.result = RESULT_ERROR analysis.result_details = 'EMPTY CONTENT' return True # save the analysis results analysis.query_result = response # did cloudphish generate an alert? if analysis.analysis_result == SCAN_RESULT_ALERT: # if cloudphish generated an alert then we'll need to wait for the alert correlation to finish # TODO temp_dir = None try: # create a temporary directory to load the alert into temp_dir = tempfile.mkdtemp(prefix='cloudphish_', dir=saq.TEMP_DIR) # grab the alert it created logging.info("downloading alert info for {}".format(url.value)) ace_api.download( analysis.uuid, temp_dir, remote_host=cloudphish_server, ssl_verification=saq.CA_CHAIN_PATH, proxies=saq.PROXIES if self.use_proxy else None, timeout=self.timeout) #response = requests.request('GET', self.get_download_alert_url(), #params={ 's': analysis.sha256_content }, #timeout=self.timeout, #proxies=saq.PROXIES if self.use_proxy else {}, #verify=saq.CA_CHAIN_PATH, #stream=True) # load the new alert cloudphish_alert = RootAnalysis() cloudphish_alert.storage_dir = temp_dir try: cloudphish_alert.load() except Exception as e: logging.warning( "unable to load cloudphish alert for {}: {}".format( url.value, e)) # XXX there is a reason for this but I forget what it was lol # merge this alert into the analysis for this url self.root.merge(analysis, cloudphish_alert) finally: # make sure we clean up these temp directories try: if temp_dir: shutil.rmtree(temp_dir) except Exception as e: logging.error("unable to delete directory {}: {}".format( temp_dir, e)) report_exception() # are we forcing the download of the URL? elif url.has_directive( DIRECTIVE_FORCE_DOWNLOAD) and analysis.file_name: # TODO fix this file naming scheme target_file = os.path.join(self.root.storage_dir, analysis.file_name) if os.path.exists(target_file): logging.warning("target file {} exists".format(target_file)) return True try: logging.info("downloading file {} from {}".format( target_file, url.value)) ace_api.cloudphish_download( url=url.value, output_path=target_file, remote_host=cloudphish_server, ssl_verification=saq.CA_CHAIN_PATH, proxies=saq.PROXIES if self.use_proxy else None, timeout=self.timeout) #response = requests.request('GET', self.get_download_url(), #params={ 's': analysis.sha256_content }, #timeout=self.timeout, #proxies=saq.PROXIES if self.use_proxy else {}, #verify=saq.CA_CHAIN_PATH, #stream=True) #with open(target_file, 'wb') as fp: #for chunk in response.iter_content(chunk_size=io.DEFAULT_BUFFER_SIZE): #if chunk: #fp.write(chunk) analysis.add_observable( F_FILE, os.path.relpath(target_file, start=self.root.storage_dir)) except Exception as e: logging.error( "unable to download file {} for url {} from cloudphish: {}" .format(target_file, url.value, e)) report_exception() return True