Example #1
0
    def scan(
        self,
        content: bytes,
        payload_meta: Optional[PayloadMeta] = None,
        request_meta: Optional[RequestMeta] = None,
        add_start_dispatch: Optional[List[str]] = None,
        add_start_deep_dispatch: Optional[List[str]] = None,
        ratelimit: Optional[str] = None,
    ) -> StoqResponse:
        """

        Wrapper for `scan_payload` that creates a `Payload` object from bytes

        :param content: Raw bytes to be scanned
        :param payload_meta: Metadata pertaining to originating source
        :param request_meta: Metadata pertaining to the originating request
        :param add_start_dispatch: Force first round of scanning to use specified plugins
        :param add_start_deep_dispatch: Force second round of scanning to use specified plugins
        :param ratelimit: Rate limit calls to scan

        :return: Complete scan results
        :rtype: StoqResponse

        """
        payload_meta = PayloadMeta() if payload_meta is None else payload_meta
        payload = Payload(content, payload_meta)
        return self.scan_payload(payload, request_meta, add_start_dispatch,
                                 add_start_deep_dispatch)
Example #2
0
    async def scan(
        self,
        content: bytes,
        payload_meta: Optional[PayloadMeta] = None,
        request_meta: Optional[RequestMeta] = None,
        add_start_dispatch: Optional[List[str]] = None,
        ratelimit: Optional[str] = None,
    ) -> StoqResponse:
        """

        Wrapper for `scan_request` that creates a `Payload` object from bytes

        :param content: Raw bytes to be scanned
        :param payload_meta: Metadata pertaining to originating source
        :param request_meta: Metadata pertaining to the originating request
        :param add_start_dispatch: Force first round of scanning to use specified plugins
        :param ratelimit: Rate limit calls to scan

        """
        self.log.debug(
            f'Content received ({len(content)} bytes): '
            f'PayloadMeta: {helpers.dumps(payload_meta, indent=0)}, '
            f'RequestMeta: {helpers.dumps(request_meta, indent=0)}'
        )
        payload_meta = payload_meta or PayloadMeta()
        payload = Payload(content, payload_meta)
        request_meta = request_meta or RequestMeta()
        request = Request(payloads=[payload], request_meta=request_meta)
        return await self.scan_request(request, add_start_dispatch)
Example #3
0
    async def scan(
        self, payload: Payload, request: Request
    ) -> Optional[WorkerResponse]:
        if self.RAISE_EXCEPTION:
            raise Exception('Test exception please ignore')

        extracted_payload_content = self.EXTRACTED_PAYLOAD or b'Lorem ipsum'
        extracted_payload = ExtractedPayload(
            extracted_payload_content,
            PayloadMeta(
                should_scan=self.SHOULD_SCAN, dispatch_to=self.EXTRACTED_DISPATCH_TO
            ),
        )
        wr = WorkerResponse({"valuable_insight": "wow"}, extracted=[extracted_payload])

        if self.RETURN_ERRORS:
            wr.errors.append(
                Error(
                    plugin_name="simple_worker",
                    error="Test error please ignore",
                    payload_id=payload.results.payload_id,
                )
            )

        if self.ADDITIONAL_DISPATCH_TO:
            wr.dispatch_to.extend(self.ADDITIONAL_DISPATCH_TO)

        return wr
Example #4
0
    async def ingest(self, queue: Queue) -> None:
        consumer = AIOKafkaConsumer(
            self.topic,
            group_id=self.group,
            auto_offset_reset='earliest',
            bootstrap_servers=self.servers,
            heartbeat_interval_ms=self.heartbeat_interval_ms,
            session_timeout_ms=self.session_timeout_ms,
            loop=get_event_loop(),
        )
        await consumer.start()
        self.log.info(f'Monitoring {self.topic} topic for messages...')

        async for message in consumer:
            msg = json.loads(message.value)
            if msg.get('_is_payload'):
                # This message is a payload that was placed on the queue
                # from the kafka-queue archiver plugin
                extra_data = msg['_payload_meta']
                extra_data['request_meta'] = msg['_request_meta']
                meta = PayloadMeta(extra_data=extra_data)
                payload = Payload(content=b64decode(msg['_content']), payload_meta=meta)
                await queue.put(payload)
            else:
                await queue.put(msg)
Example #5
0
    def get(self, task: ArchiverResponse) -> Optional[Payload]:
        """
        Retrieve archived payload from MongoDB

        """
        self._connect_gridfs()
        result = self.gridfs_db.get(task.results['_id'])
        if result:
            # payload = result.read()
            return Payload(payload, PayloadMeta(extra_data=task.results))
Example #6
0
 def scan(self, payload: Payload,
          request_meta: RequestMeta) -> Optional[WorkerResponse]:
     if self.RAISE_EXCEPTION:
         raise Exception('Test exception please ignore')
     if self.DISPATCH_TO:
         dispatch_meta = PayloadMeta(dispatch_to=self.DISPATCH_TO)
         p = ExtractedPayload(b'Lorem ipsum', dispatch_meta)
     else:
         p = ExtractedPayload(b'Lorem ipsum')
     wr = WorkerResponse({'valuable_insight': 'wow'}, extracted=[p])
     if self.RETURN_ERRORS:
         wr.errors += ['Test error please ignore']
     return wr
Example #7
0
    async def get(self, task: ArchiverResponse) -> Payload:
        """
        Retrieve archived payload from Azure Blob Storage

        """
        blob_client: BlobClient = BlobClient.from_connection_string(
            conn_str=self.conn_str,
            container_name=task.results['container_name'],
            blob_name=task.results['blob_name'],
        )
        content = await blob_client.download_blob()
        await blob_client.close()
        meta = PayloadMeta(task.results)
        return Payload(content.readall(), meta)
Example #8
0
    def get(self, task: ArchiverResponse) -> Payload:
        """
        Retrieve archived payload from S3

        """
        if not self.client:
            self._get_client()
        meta = PayloadMeta(
            extra_data={'bucket': task.results['bucket'], 'path': task.results['path']}
        )
        content = self.client.get_object(
            Bucket=task.results['bucket'], Key=task.results['path']
        )['Body']
        return Payload(content.read(), meta)
Example #9
0
 async def test_dont_dest_archive_payload(self):
     s = Stoq(base_dir=utils.get_data_dir(), dest_archivers=['dummy_archiver'])
     dummy_archiver = s.load_plugin('dummy_archiver')
     dummy_archiver.archive = asynctest.create_autospec(
         dummy_archiver.archive, return_value=None
     )
     response = await s.scan(
         self.generic_content,
         payload_meta=PayloadMeta(should_archive=False),
         add_start_dispatch=['extract_payload'],
         request_meta=RequestMeta(archive_payloads=True),
     )
     dummy_archiver.archive.assert_awaited_once()
     self.assertNotIn('dummy_archiver', response.results[0].plugins_run['archivers'])
     self.assertIn('dummy_archiver', response.results[1].plugins_run['archivers'])
 def ingest(self, queue: Queue) -> None:
     consumer = KafkaConsumer(
         self.topic,
         group_id=self.group,
         auto_offset_reset='earliest',
         bootstrap_servers=self.servers,
     )
     print(f'Monitoring {self.topic} topic for messages...')
     for message in consumer:
         msg = json.loads(message.value)
         if msg.get('_is_payload'):
             meta = PayloadMeta(extra_data=msg['_request_meta'])
             payload = Payload(content=msg['_content'], payload_meta=meta)
             queue.put(payload)
         else:
             queue.put(msg)
Example #11
0
    def get(self, task: ArchiverResponse) -> Payload:
        """
        Retrieve archived payload from gcs

        """
        meta = PayloadMeta(
            extra_data={
                'bucket': task.results['archive_bucket'],
                'path': task.results['path'],
                'project_id': task.results['project_id'],
            })
        client = Client(project=task.results['project_id'])
        bucket = client.get_bucket(task.results['archive_bucket'])
        blob = Blob(task.results['path'], bucket)
        content = BytesIO()
        blob.download_to_file(content)
        content.seek(0)
        return Payload(content.read(), meta)
Example #12
0
 async def ingest(self, queue: Queue) -> None:
     self.log.info(f'Monitoring redis queue {self.redis_queue}')
     while True:
         msg = self.conn.blpop(self.redis_queue, timeout=0)
         if not msg:
             time.sleep(0.1)
             continue
         data = msg[1].decode()
         payload = self.conn.get(f'{data}_buf')
         meta = self.conn.get(f'{data}_meta')
         if meta and payload:
             meta = json.loads(meta.decode())
             await queue.put(
                 Payload(payload, payload_meta=PayloadMeta(extra_data=meta))
             )
             self.conn.delete(f'{meta}_buf')
             self.conn.delete(f'{meta}_meta')
         else:
             await queue.put(json.loads(data))
Example #13
0
    def get(self, task: ArchiverResponse) -> Payload:
        """
        Retrieve archived payload from gcs

        """
        meta = PayloadMeta(
            extra_data={
                'bucketId': task.results['bucketId'],
                'objectId': task.results['objectId'],
                'projectId': task.results['projectId'],
            }
        )
        count = 0
        client = Client(project=task.results['projectId'])
        while count < self.max_retries:
            try:
                bucket = client.get_bucket(task.results['bucketId'])
                blob = Blob(task.results['objectId'], bucket)
                content = BytesIO()
                blob.download_to_file(content)
                break
            except (
                InvalidResponse,
                GoogleAPICallError,
                InternalServerError,
                SSLError,
            ) as e:
                if count >= self.max_retries:
                    raise StoqPluginException(
                        f'Failed to download {task.results["bucketId"]}/{task.results["objectId"]} from GCS: {str(e)}'
                    )
                count += 1
                sleep(randrange(0, 4))
        content.seek(0)
        data = content.read()
        if self.use_encryption:
            data = self._decrypt(data)
        return Payload(data, meta)
Example #14
0
 def test_payloadmeta_to_str(self):
     response = PayloadMeta()
     response_str = str(response)
     response_dict = json.loads(response_str)
     self.assertIsInstance(response_str, str)
     self.assertIsInstance(response_dict, dict)
Example #15
0
 def get(self, task: ArchiverResponse) -> Optional[Payload]:
     if self.RAISE_EXCEPTION:
         raise Exception('Test exception please ignore')
     return Payload(self.PAYLOAD, PayloadMeta(extra_data=task.results))
Example #16
0
    async def test_reconstruct_all_subresponses(self):
        # Construct a fake stoq_response as if it were generated from a file
        # A.zip that contains two files, B.txt and C.zip, where C.zip contains D.txt
        results = [
            Payload(content=b'', payload_id='A.zip', payload_meta=PayloadMeta()),
            Payload(
                content=b'',
                payload_id='B.txt',
                payload_meta=PayloadMeta(),
                extracted_from='A.zip',
                extracted_by='fake',
            ),
            Payload(
                content=b'',
                payload_id='C.zip',
                payload_meta=PayloadMeta(),
                extracted_from='A.zip',
                extracted_by='fake',
            ),
            Payload(
                content=b'',
                payload_id='D.txt',
                payload_meta=PayloadMeta(),
                extracted_from='C.zip',
                extracted_by='fake',
            ),
        ]
        request = Request(request_meta=RequestMeta(extra_data={'check': 'me'}))
        payload_count = 1
        for result in results:
            result.results.workers['fake'] = f'result-{payload_count}'
            result.results.plugins_run['workers'].append('fake')
            request.payloads.append(result)
            payload_count += 1

        initial_response = StoqResponse(request)
        s = Stoq(base_dir=utils.get_data_dir(), decorators=['simple_decorator'])
        all_subresponses = [
            r async for r in s.reconstruct_all_subresponses(initial_response)
        ]
        # We expect there to be four "artificial" responses generated, one for
        # each payload as the root.
        self.assertEqual(len(all_subresponses), 4)
        # We expect the first response to have all 4 payloads, the second response
        # to have just the second payload, the third response to have the third
        # and fourth payload, and the fourth response to have just the fourth payload
        self.assertEqual(
            [len(stoq_response.results) for stoq_response in all_subresponses],
            [4, 1, 2, 1],
        )
        self.assertEqual(
            [
                stoq_response.results[0].workers['fake']
                for stoq_response in all_subresponses
            ],
            ['result-1', 'result-2', 'result-3', 'result-4'],
        )
        self.assertTrue(
            all(
                'simple_decorator' in stoq_response.decorators
                for stoq_response in all_subresponses
            )
        )
        # Assert that they all have the same scan ID
        self.assertEqual(
            len({stoq_response.scan_id for stoq_response in all_subresponses}), 1
        )
Example #17
0
    async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
        pe = self._get_pe_file(payload.content)

        imports = self._get_imports(pe)
        exports = self._get_exports(pe)
        version_info = self._get_version_info(pe)
        certs = self._get_certs(pe)
        sections = self._get_section_info(pe)
        resources = self._get_resource_info(pe)
        rich_header = self._get_rich_header_hash(pe)
        imphash = self._get_imphash(pe)
        compile_time = self._get_compile_time(pe)
        tls_callbacks = self._get_tls_callbacks(pe)
        image_base = self._get_image_base(pe)
        entry_point = self._get_entry_point(pe)
        debug_info = self._get_debug_info(pe)
        is_packed = self._is_packed(pe)
        is_exe = self._is_exe(pe)
        is_dll = self._is_dll(pe)
        is_driver = self._is_driver(pe)
        is_suspicious = self._is_suspicious(pe)
        is_valid = self._is_valid(pe)

        results: Dict = {}
        extracted: List[ExtractedPayload] = []
        if imports:
            results['imports'] = imports
        if exports:
            results['exports'] = exports
        if version_info:
            results['version_info'] = version_info
        if certs:
            results['certificates'] = []
            for (cert_data, content) in certs:
                results['certificates'].append(cert_data)
                if content:
                    cert_data['filename'] = bytes(cert_data['sha256'], 'ascii')
                    extracted.append(
                        ExtractedPayload(
                            content=content,
                            payload_meta=PayloadMeta(extra_data=cert_data),
                        )
                    )
        if sections:
            results['sections'] = sections
        if resources:
            results['resources'] = []
            for (rsrc_data, content) in resources:
                results['resources'].append(rsrc_data)
                if content:
                    rsrc_data['filename'] = rsrc_data['name']
                    extracted.append(
                        ExtractedPayload(
                            content=content,
                            payload_meta=PayloadMeta(extra_data=rsrc_data),
                        )
                    )
        if rich_header:
            results['rich_header'] = rich_header
        if imphash:
            results['imphash'] = imphash
        if tls_callbacks:
            results['tls_callbacks'] = tls_callbacks
        if debug_info:
            results['debug_info'] = debug_info
        if is_packed:
            results['is_packed'] = is_packed
        if is_exe:
            results['is_exe'] = is_exe
        if is_dll:
            results['is_dll'] = is_dll
        if is_driver:
            results['is_driver'] = is_driver
        if is_suspicious:
            results['is_suspicious'] = is_suspicious
        if is_valid:
            results['is_valid'] = is_valid
        results['compile_time_epoch'] = compile_time[0]
        results['compile_time'] = compile_time[1]
        results['image_base'] = image_base
        results['entrypoint'] = entry_point

        pe.close()
        return WorkerResponse(results=results, extracted=extracted)