async def scan(self, guid, artifact_type, content, metadata, chain): """Scan an artifact with Yara. Args: guid (str): GUID of the bounty under analysis, use to track artifacts in the same bounty artifact_type (ArtifactType): Artifact type for the bounty being scanned content (bytes): Content of the artifact to be scan metadata (dict) Dict of metadata for the artifact chain (str): Chain we are operating on Returns: ScanResult: Result of this scan """ matches = self.rules.match(data=content) sysname, _, _, _, machine = os.uname() metadata = Verdict().set_scanner(operating_system=sysname, architecture=machine, vendor_version=yara.__version__) if matches: # author responsible for distilling multiple metadata values into a value for ScanResult metadata.set_malware_family(matches[0].rule) return ScanResult(bit=True, verdict=True, metadata=metadata.json()) metadata.set_malware_family('') return ScanResult(bit=True, verdict=False, metadata=metadata.json())
async def scan(self, guid, artifact_type, content, metadata, chain): """Scan an artifact with ClamAV Args: guid (str): GUID of the bounty under analysis, use to track artifacts in the same bounty artifact_type (ArtifactType): Artifact type for the bounty being scanned content (bytes): Content of the artifact to be scan metadata (dict) Dict of metadata for the artifact chain (str): Chain we are operating on Returns: ScanResult: Result of this scan """ result = await self.clamd.instream(BytesIO(content)) stream_result = result.get('stream', []) vendor = await self.clamd.version() metadata = Verdict().set_scanner(operating_system=platform.system(), architecture=platform.machine(), vendor_version=vendor.strip('\n')) if len(stream_result) >= 2 and stream_result[0] == 'FOUND': metadata.set_malware_family(stream_result[1].strip('\n')) return ScanResult(bit=True, verdict=True, confidence=1.0, metadata=metadata.json()) metadata.set_malware_family('') return ScanResult(bit=True, verdict=False, metadata=metadata.json())
async def fetch_and_scan(artifact_metadata, index): content = await self.client.get_artifact(uri, index) if not self.bounty_filter.is_allowed(artifact_metadata): return ScanResult() if content is not None: return await self.scan(guid, artifact_type, content, artifact_metadata, chain) return ScanResult()
async def fetch_and_scan(index): content = await self.client.get_artifact(uri, index) if content is not None: # Ignoring metadata for now try: return await self.scan( guid, artifact_type, artifact_type.decode_content(content), None, chain) except DecodeError: return ScanResult() return ScanResult()
async def fetch_and_scan(index): content = await self.client.get_artifact(uri, index) if content is not None: # Ignoring metadata for now return await self.scan(guid, artifact_type, content, None, chain) return ScanResult()
def __finish(self): """ Set the results in the future and mark done """ scan_results = [ self.results.get(i, ScanResult()) for i, _ in enumerate(self.jobs) ] self.future.set_result(scan_results)
async def fetch_and_scan(artifact_metadata, index): content = await self.client.get_artifact(uri, index) if not self.bounty_filter.is_allowed(artifact_metadata): return ScanResult() if content is not None: try: result = await self.scan( guid, artifact_type, artifact_type.decode_content(content), artifact_metadata, chain) except DecodeError: result = ScanResult() if result.bit: result.confidence = self.confidence_modifier.modify( artifact_metadata, result.confidence) return result return ScanResult()
def scan_sync(self, guid, artifact_type, content, metadata, chain): """Scan an artifact Args: guid (str): GUID of the bounty under analysis, use to track artifacts in the same bounty artifact_type (ArtifactType): Artifact type for the bounty being scanned content (bytes): Content of the artifact to be scan metadata (dict) Dict of metadata for the artifact chain (str): Chain we are operating on Returns: ScanResult: Result of this scan """ metadata = Verdict().set_scanner(operating_system=self.system, architecture=self.machine) if isinstance(content, str): content = content.encode() if EICAR in content: metadata.set_malware_family('Eicar Test File') return ScanResult(bit=True, verdict=True, metadata=metadata.json()) metadata.set_malware_family('') return ScanResult(bit=True, verdict=False, metadata=metadata.json())
async def scan_async(self, guid, artifact_type, content, metadata, chain): """Scan an artifact Args: guid (str): GUID of the bounty under analysis, use to track artifacts in the same bounty artifact_type (ArtifactType): Artifact type for the bounty being scanned content (bytes): Content of the artifact to be scan metadata (dict) Dict of metadata for the artifact chain (str): Chain we are operating on Returns: ScanResult: Result of this scan """ return ScanResult()
def __store_job_response(self, response: JobResponse, confidence_modifier: Optional[ConfidenceModifier]): """ Converts a JobResponse to ScanResult with modified confidence. Stores at the correct index in internal results :param response: JobResponse to conver :param confidence_modifier: an optional ConfidenceModifier to potentially change the confidence :return: """ confidence = response.confidence if confidence_modifier: confidence = confidence_modifier.modify(self.jobs[response.index].metadata, response.confidence) self.results[response.index] = ScanResult(bit=response.bit, verdict=response.verdict, confidence=confidence, metadata=response.metadata)
async def scan(self, guid, artifact_type, content, metadata, chain): """Scan an artifact Args: guid (str): GUID of the bounty under analysis, use to track artifacts in the same bounty artifact_type (ArtifactType): Artifact type for the bounty being scanned content (bytes): Content of the artifact to be scan metadata (dict) Dict of metadata for the artifact chain (str): Chain we are operating on Returns: ScanResult: Result of this scan """ results = await asyncio.gather(*[ backend.scan(guid, artifact_type, content, chain) for backend in self.backends ]) # Unpack the results bits = [r.bit for r in results] verdicts = [r.verdict for r in results] confidences = [r.confidence for r in results] metadatas = [r.metadata for r in results] asserted_confidences = [c for b, c in zip(bits, confidences) if b] avg_confidence = sum(asserted_confidences) / len(asserted_confidences) # author responsible for distilling multiple metadata values into a value for ScanResult metadata = metadatas[0] try: metadatas = [ json.loads(metadata) for metadata in metadatas if metadata and Verdict.validate(json.loads(metadata)) ] if metadatas: metadata = Verdict().set_malware_family(metadatas[0].get( 'malware_family', '')).json() except json.JSONDecodeError: logger.exception(f'Error decoding sub metadata') return ScanResult(bit=any(bits), verdict=any(verdicts), confidence=avg_confidence, metadata=metadata)
async def scan(self, guid, artifact_type, content, metadata, chain): """Match hash of an artifact with our database Args: guid (str): GUID of the bounty under analysis, use to track artifacts in the same bounty artifact_type (ArtifactType): Artifact type for the bounty being scanned content (bytes): Content of the artifact to be scan metadata (dict): Metadata blob for this artifact chain (str): Chain sample is being sent from Returns: ScanResult: Result of this scan """ h = hashlib.sha256(content).hexdigest() cursor = self.conn.cursor() cursor.execute('SELECT * FROM files WHERE name=?', (h, )) row = cursor.fetchone() bit = row is not None vote = row is not None and row[1] == 1 vote = vote or EICAR in content return ScanResult(bit=bit, verdict=vote)
async def wait_for_result(result_key): remaining = KEY_TIMEOUT try: with await self.redis as redis: while True: result = await redis.blpop(result_key, timeout=0) if result: break if remaining == 0: logger.critical( 'Timeout waiting for result in bounty %s', guid) return None remaining -= 1 await asyncio.sleep(1) j = json.loads(result[1].decode('utf-8')) # increase perf counter for autoscaling q_counter = f'{self.queue}_scan_result_counter' await redis.incr(q_counter) return j['index'], ScanResult(bit=j['bit'], verdict=j['verdict'], confidence=j['confidence'], metadata=j['metadata']) except aioredis.errors.ReplyError: logger.exception('Redis out of memory') except OSError: logger.exception('Redis connection down') except (AttributeError, ValueError, KeyError): logger.error('Received invalid response from worker') return None
def rate_limit_respond(self, job: JobRequest): loop = asyncio.get_event_loop() blank = ScanResult() response = JobResponse(job.index, blank.bit, blank.verdict, blank.confidence, blank.metadata) loop.create_task(self.respond(job, response))
async def scan(self, guid, artifact_type, uri, expiration_blocks, metadata, chain): """Creates a set of jobs to scan all the artifacts at the given URI that are passed via Redis to workers Args: guid (str): GUID of the associated bounty artifact_type (ArtifactType): Artifact type for the bounty being scanned uri (str): Base artifact URI expiration_blocks (int): Blocks until vote round ends metadata (list[dict]) List of metadata json blobs for artifacts chain (str): Chain we are operating on Returns: list(ScanResult): List of ScanResult objects """ # Ensure we don't wait past the vote round duration for one long artifact timeout = expiration_blocks - self.time_to_post logger.info(f' timeout set to {timeout}') async def wait_for_result(result_key): remaining = KEY_TIMEOUT try: with await self.redis as redis: while True: result = await redis.blpop(result_key, timeout=0) if result: break if remaining == 0: logger.critical( 'Timeout waiting for result in bounty %s', guid) return None remaining -= 1 await asyncio.sleep(1) j = json.loads(result[1].decode('utf-8')) # increase perf counter for autoscaling q_counter = f'{self.queue}_scan_result_counter' await redis.incr(q_counter) return j['index'], ScanResult(bit=j['bit'], verdict=j['verdict'], confidence=j['confidence'], metadata=j['metadata']) except aioredis.errors.ReplyError: logger.exception('Redis out of memory') except OSError: logger.exception('Redis connection down') except (AttributeError, ValueError, KeyError): logger.error('Received invalid response from worker') return None num_artifacts = len(await self.client.list_artifacts(uri)) # Fill out metadata to match same number of artifacts metadata = BountyFilter.pad_metadata(metadata, num_artifacts) jobs = [] for i in range(num_artifacts): if self.bounty_filter is None or self.bounty_filter.is_allowed( metadata[i]): jobs.append( json.dumps({ 'ts': time.time() // 1, 'guid': guid, 'artifact_type': artifact_type.value, 'uri': uri, 'index': i, 'chain': chain, 'duration': timeout, 'polyswarmd_uri': self.client.polyswarmd_uri, 'metadata': metadata[i] })) if jobs: try: await self.redis.rpush(self.queue, *jobs) key = '{}_{}_{}_results'.format(self.queue, guid, chain) results = await asyncio.gather( *[wait_for_result(key) for _ in jobs]) results = {r[0]: r[1] for r in results if r is not None} # Age off old result keys await self.redis.expire(key, KEY_TIMEOUT) return [ results.get(i, ScanResult()) for i in range(num_artifacts) ] except OSError: logger.exception('Redis connection down') except aioredis.errors.ReplyError: logger.exception('Redis out of memory') return []
async def scan(self, guid, artifact_type, content, metadata, chain): return ScanResult(bit=True)
async def scan_async(self, guid, artifact_type, content, metadata, chain): time.sleep(self.sleep) return ScanResult(verdict=False)
def scan_error_result(e: 'BaseScanError') -> 'ScanResult': return ScanResult( bit=False, verdict=False, metadata=Verdict().set_malware_family('').add_extra('scan_error', e.event_name) )
def scan_result(request, scan_metadata): if isinstance(request.param, Exception): return request.param bit, verdict = request.param return ScanResult(bit=bit, verdict=verdict, metadata=scan_metadata)