コード例 #1
0
def test_no_pad_on_too_long():
    # arrange
    metadata = [{'mimetype': 'text/plain'}] * 10
    # act
    padded = MetadataFilter.pad_metadata(metadata, 5)
    # assert
    assert padded == metadata
コード例 #2
0
def test_pad_fills_empty_to_length():
    # arrange
    metadata = []
    # act
    padded = MetadataFilter.pad_metadata(metadata, 2)
    # assert
    assert padded == [{}] * 2
コード例 #3
0
def test_no_pad_on_match_length():
    # arrange
    metadata = [{'mimetype': 'text/plain'}] * 5
    # act
    padded = MetadataFilter.pad_metadata(metadata, 5)
    # assert
    assert padded == metadata
コード例 #4
0
def test_pad_fills_with_none_on_invalid_metadata():
    # arrange
    metadata = [{'asdf': 'asdf'}]
    # act
    padded = MetadataFilter.pad_metadata(metadata, 2)
    # assert
    assert padded == [{}] * 2
コード例 #5
0
def test_pad_fills_to_length():
    # arrange
    metadata = [{'mimetype': 'text/plain'}]
    # act
    padded = MetadataFilter.pad_metadata(metadata, 2)
    # assert
    assert padded == [{'mimetype': 'text/plain'}, {}]
コード例 #6
0
    async def fetch_and_scan_all(self, guid, artifact_type, uri, duration,
                                 metadata, chain):
        """Fetch and scan all artifacts concurrently

        Args:
            guid (str): GUID of the associated bounty
            artifact_type (ArtifactType): Artifact type for the bounty being scanned
            uri (str):  Base artifact URI
            duration (int): Max number of blocks to take
            metadata (list[dict]) List of metadata json blobs for artifacts
            chain (str): Chain we are operating on

        Returns:
            (list(bool), list(bool), list(str)): Tuple of mask bits, verdicts, and metadatas
        """
        async def fetch_and_scan(artifact_metadata, index):
            content = await self.client.get_artifact(uri, index)
            if not self.bounty_filter.is_allowed(artifact_metadata):
                return ScanResult()

            if content is not None:
                try:
                    result = await self.scan(
                        guid, artifact_type,
                        artifact_type.decode_content(content),
                        artifact_metadata, chain)
                except DecodeError:
                    result = ScanResult()

                if result.bit:
                    result.confidence = self.confidence_modifier.modify(
                        artifact_metadata, result.confidence)
                    return result

            return ScanResult()

        artifacts = await self.client.list_artifacts(uri)
        metadata = MetadataFilter.pad_metadata(metadata, len(artifacts))

        return await asyncio.gather(
            *[fetch_and_scan(metadata[i], i) for i in range(len(artifacts))])
コード例 #7
0
    async def scan(self, guid, artifact_type, uri, duration, metadata, chain):
        """Creates a set of jobs to scan all the artifacts at the given URI that are passed via Redis to workers

            Args:
                guid (str): GUID of the associated bounty
                artifact_type (ArtifactType): Artifact type for the bounty being scanned
                uri (str):  Base artifact URI
                duration (int): number of blocks until scan is due
                metadata (list[dict]) List of metadata json blobs for artifacts
                chain (str): Chain we are operating on

            Returns:
                list(ScanResult): List of ScanResult objects
            """
        # Ensure we don't wait past the scan duration for one large artifact
        timeout = duration - self.time_to_post
        logger.info(f'Timeout set to {timeout}')
        loop = asyncio.get_event_loop()

        num_artifacts = len(await self.client.list_artifacts(uri))
        # Fill out metadata to match same number of artifacts
        metadata = MetadataFilter.pad_metadata(metadata, num_artifacts)

        jobs = []
        try:
            for i in range(num_artifacts):
                if (self.bounty_filter is None or self.bounty_filter.is_allowed(metadata[i])) \
                 and (self.rate_limiter is None or await self.rate_limiter.use()):
                    job_uri = f'{self.client.polyswarmd_uri}/artifacts/{uri}/{i}/'
                    job = JobRequest(polyswarmd_uri=self.client.polyswarmd_uri,
                                     guid=guid,
                                     index=i,
                                     uri=job_uri,
                                     artifact_type=artifact_type.value,
                                     duration=timeout,
                                     metadata=metadata[i],
                                     chain=chain,
                                     ts=int(time.time()))
                    jobs.append(job)

            if jobs:
                # Update number of jobs sent
                loop.create_task(self._update_job_counter(len(jobs)))

                # Send jobs as json string to backend
                loop.create_task(
                    self._send_jobs([json.dumps(job.asdict())
                                     for job in jobs]))

                # Send jobs to job processor
                future = Future()
                key = JOB_RESULTS_FORMAT.format(self.queue, guid, chain)
                await self.job_processor.register_jobs(guid, key, jobs, future)

                # Age off old result keys
                loop.create_task(self._expire_key(key, duration + KEY_TIMEOUT))

                # Wait for results from job processor
                return await future
        except OSError:
            logger.exception('Redis connection down')
            await self.__reset_redis()
        except aioredis.errors.ReplyError:
            logger.exception('Redis out of memory')
            await self.__reset_redis()
        except aioredis.errors.ConnectionForcedCloseError:
            logger.exception('Redis connection closed')
            await self.__reset_redis()

        return []