コード例 #1
0
ファイル: core.py プロジェクト: ravifatty/stoq
 async def _consume(
     self,
     payload_queue: asyncio.Queue,
     request_meta: Optional[RequestMeta] = None,
     add_start_dispatch: Optional[List[str]] = None,
 ) -> None:
     while True:
         try:
             task = await payload_queue.get()
             # Determine whether the provider has returned a `Payload`, or a task.
             # If it is a task, load the defined archiver plugin to load the
             # `Payload`, otherwise, simply continue on with the scanning.
             if isinstance(task, Payload):
                 request = Request([task], request_meta)
                 await self.scan_request(request, add_start_dispatch)
             else:
                 for source_archiver, task_meta in task.items():
                     self.log.debug(
                         f'Provider task received: source_archiver: {source_archiver}, '
                         f'task_meta: {task_meta}')
                     try:
                         ar = ArchiverResponse(task_meta)
                         payload = await self._loaded_source_archiver_plugins[
                             source_archiver].get(ar)
                         if payload:
                             request = Request([payload], request_meta)
                             await self.scan_request(
                                 request, add_start_dispatch)
                     except Exception as e:
                         self.log.warn(
                             f'"{task_meta}" failed with archiver "{source_archiver}": {str(e)}'
                         )
             payload_queue.task_done()
         except asyncio.QueueEmpty:
             pass
コード例 #2
0
    async def archive(self, payload: Payload,
                      request: Request) -> ArchiverResponse:
        """
        Archive payload to Azure Blob Storage

        """
        if self.use_sha:
            filename = hashlib.sha1(payload.content).hexdigest()
            filename = f'{"/".join(list(filename[:5]))}/{filename}'
        elif self.use_datetime:
            datetime_path = datetime.now().strftime('%Y/%m/%d')
            filename = f'{datetime_path}/{payload.payload_id}'
        else:
            filename = payload.results.payload_id

        blob_client: BlobClient = BlobClient.from_connection_string(
            conn_str=self.conn_str,
            container_name=self.archive_container,
            blob_name=filename,
        )
        try:
            await blob_client.upload_blob(payload.content)
        except ResourceExistsError:
            pass
        await blob_client.close()
        return ArchiverResponse({
            'container_name': self.archive_container,
            'blob_name': filename
        })
コード例 #3
0
 async def archive(
     self, payload: Payload, request: Request
 ) -> Optional[ArchiverResponse]:
     self.conn.set(f'{payload.payload_id}_meta', str(payload.payload_meta))
     self.conn.set(f'{payload.payload_id}_buf', payload.content)
     self.conn.rpush(self.redis_queue, payload.payload_id)
     return ArchiverResponse({'msg_id': payload.payload_id})
コード例 #4
0
 async def archive(self, payload: Payload,
                   request: Request) -> Optional[ArchiverResponse]:
     topic = f'projects/{self.project_id}/topics/{self.topic}'
     self._publish_connect(topic)
     future = self.publish_client.publish(topic,
                                          payload.content,
                                          meta=payload.payload_meta)
     return ArchiverResponse({'msg_id': future.result()})
コード例 #5
0
ファイル: simple_archiver.py プロジェクト: Sts0mrg0/stoq-1
 def archive(self, payload: Payload,
             request_meta: RequestMeta) -> Optional[ArchiverResponse]:
     if self.RAISE_EXCEPTION:
         raise Exception('Test exception please ignore')
     ar = ArchiverResponse({'file_save_id': 12345})
     if self.RETURN_ERRORS:
         ar.errors += ['Test error please ignore']
     return ar
コード例 #6
0
ファイル: test_core.py プロジェクト: ytreister/stoq
 async def test_source_archive(self):
     s = Stoq(base_dir=utils.get_data_dir(), source_archivers=['simple_archiver'])
     simple_archiver = s.load_plugin('simple_archiver')
     simple_archiver.PAYLOAD = b'This is a payload'
     task = ArchiverResponse(results={'path': '/tmp/123'})
     payload = await simple_archiver.get(task)
     self.assertEqual('/tmp/123', payload.results.payload_meta.extra_data['path'])
     self.assertEqual(payload.content, simple_archiver.PAYLOAD)
コード例 #7
0
ファイル: s3.py プロジェクト: sooshie/stoq-plugins-public
    def archive(self, payload: Payload, request_meta: RequestMeta) -> ArchiverResponse:
        """
        Archive payload to S3

        """
        if self.use_sha:
            filename = hashlib.sha1(payload.content).hexdigest()
            filename = f'{"/".join(list(filename[:5]))}/{filename}'
        else:
            filename = payload.payload_id
        self._upload(payload.content, filename, self.archive_bucket)
        return ArchiverResponse({'bucket': self.archive_bucket, 'path': filename})
コード例 #8
0
 def archive(
     self, payload: Payload, request_meta: RequestMeta
 ) -> Optional[ArchiverResponse]:
     self._connect()
     msg = {
         '_is_payload': True,
         '_content': payload.content,
         '_request_meta': request_meta,
     }
     self.producer.send(self.topic, helpers.dumps(msg).encode())
     self.producer.flush()
     return ArchiverResponse()
コード例 #9
0
    def archive(self, payload: Payload, request_meta: RequestMeta) -> ArchiverResponse:
        """
        Archive a payload to MongoDB

        """
        self._connect_gridfs()
        sha1 = helpers.get_sha1(payload.content)
        meta = payload.payload_meta.extra_data
        meta['_id'] = sha1
        try:
            with self.gridfs_db.new_file(**meta) as fp:
                fp.write(payload.content)
        except (DuplicateKeyError, FileExists):
            pass
        return ArchiverResponse(meta)
コード例 #10
0
    def archive(
        self, payload: Payload, request_meta: RequestMeta
    ) -> Optional[ArchiverResponse]:
        """
        Archive Payload object to Kafka queue

        """
        self._connect()
        msg = {
            '_is_payload': True,
            '_content': b64encode(payload.content),
            '_payload_meta': payload.payload_meta.extra_data,
            '_request_meta': request_meta,
        }
        self.producer.send(self.topic, helpers.dumps(msg).encode())
        self.producer.flush()
        return ArchiverResponse()
コード例 #11
0
    def archive(self, payload: Payload, request_meta: RequestMeta) -> ArchiverResponse:
        """
        Archive payload to GCS

        """

        if self.use_sha:
            filename = hashlib.sha1(payload.content).hexdigest()
            filename = f'{"/".join(list(filename[:5]))}/{filename}'
        elif self.use_datetime:
            datetime_path = datetime.now().strftime('%Y/%m/%d')
            filename = f'{datetime_path}/{payload.payload_id}'
        else:
            filename = payload.payload_id
        self._upload(payload.content, filename, self.archive_bucket)
        return ArchiverResponse(
            {
                'bucketId': self.archive_bucket,
                'objectId': filename,
                'projectId': self.project_id,
            }
        )
コード例 #12
0
ファイル: core.py プロジェクト: chemberger/stoq
    def run(
        self,
        request_meta: Optional[RequestMeta] = None,
        add_start_dispatch: Optional[List[str]] = None,
        add_start_deep_dispatch: Optional[List[str]] = None,
    ) -> None:
        """

        Run stoQ using a provider plugin to scan multiple files until exhaustion

        :param request_meta: Metadata pertaining to the originating request
        :param add_start_dispatch: Force first round of scanning to use specified plugins
        :param add_start_deep_dispatch: Force second round of scanning to use specified plugins

        """
        # Don't initialize any (provider) plugins here! They should be
        # initialized on stoq start-up or via load_plugin()
        if not self._loaded_provider_plugins:
            raise StoqException('No activated provider plugins')
        payload_queue: queue.Queue = queue.Queue(self.max_queue)
        with concurrent.futures.ThreadPoolExecutor() as executor:
            # Start the load operations and mark each future with its URL
            future_to_name = {
                executor.submit(plugin.ingest, payload_queue): name
                for name, plugin in self._loaded_provider_plugins.items()
            }
            while len(future_to_name) > 0 or payload_queue.qsize() > 0:
                try:
                    # Using get_nowait results in high CPU churn
                    task = payload_queue.get(timeout=0.1)
                    # Determine whether the provider has returned a `Payload`, or a task.
                    # If it is a task, load the defined archiver plugin to load the
                    # `Payload`, otherwise, simply continue on with the scanning.
                    if isinstance(task, Payload):
                        self.scan_payload(
                            task,
                            request_meta=request_meta,
                            add_start_dispatch=add_start_dispatch,
                            add_start_deep_dispatch=add_start_deep_dispatch,
                        )
                    else:
                        for source_archiver, task_meta in task.items():
                            try:
                                ar = ArchiverResponse(task_meta)
                                payload = self._loaded_source_archiver_plugins[
                                    source_archiver].get(ar)
                                if payload:
                                    self.scan_payload(
                                        payload,
                                        request_meta=request_meta,
                                        add_start_dispatch=add_start_dispatch,
                                        add_start_deep_dispatch=
                                        add_start_deep_dispatch,
                                    )
                            except Exception as e:
                                self.log.warn(
                                    f'"{task_meta}" failed with archiver "{source_archiver}": {str(e)}'
                                )
                except queue.Empty:
                    pass
                for future in [fut for fut in future_to_name if fut.done()]:
                    try:
                        future.result()
                        self.log.info(
                            f'Provider plugin {future_to_name[future]} successfully completed'
                        )
                        del future_to_name[future]
                    except Exception as e:
                        msg = f'provider:{future_to_name[future]} failed'
                        self.log.exception(msg)
                        raise StoqException(msg) from e
コード例 #13
0
ファイル: test_core.py プロジェクト: ytreister/stoq
 def test_archiverresponse_to_str(self):
     response = ArchiverResponse()
     response_str = str(response)
     response_dict = json.loads(response_str)
     self.assertIsInstance(response_str, str)
     self.assertIsInstance(response_dict, dict)