async def test_pad_fills_empty_to_length():
    # arrange
    metadata = []
    # act
    padded = BountyFilter.pad_metadata(metadata, 2)
    # assert
    assert padded == [{}] * 2
    async def fetch_and_scan_all(self, guid, artifact_type, uri, duration, metadata, chain):
        """Fetch and scan all artifacts concurrently

        Args:
            guid (str): GUID of the associated bounty
            artifact_type (ArtifactType): Artifact type for the bounty being scanned
            uri (str):  Base artifact URI
            duration (int): Max number of blocks to take
            metadata (list[dict]) List of metadata json blobs for artifacts
            chain (str): Chain we are operating on

        Returns:
            (list(bool), list(bool), list(str)): Tuple of mask bits, verdicts, and metadatas
        """
        async def fetch_and_scan(artifact_metadata, index):
            content = await self.client.get_artifact(uri, index)
            if not self.bounty_filter.is_allowed(artifact_metadata):
                return ScanResult()

            if content is not None:
                return await self.scan(guid, artifact_type, content, artifact_metadata, chain)

            return ScanResult()

        artifacts = await self.client.list_artifacts(uri)
        metadata = BountyFilter.pad_metadata(metadata, len(artifacts))

        return await asyncio.gather(*[
            fetch_and_scan(metadata[i], i) for i in range(len(artifacts))
        ])
async def test_no_pad_on_too_long():
    # arrange
    metadata = [{"mimetype": "text/plain"}] * 10
    # act
    padded = BountyFilter.pad_metadata(metadata, 5)
    # assert
    assert padded == metadata
async def test_no_pad_on_match_length():
    # arrange
    metadata = [{"mimetype": "text/plain"}] * 5
    # act
    padded = BountyFilter.pad_metadata(metadata, 5)
    # assert
    assert padded == metadata
async def test_pad_fills_with_none_on_invalid_metadata():
    # arrange
    metadata = [{"asdf": "asdf"}]
    # act
    padded = BountyFilter.pad_metadata(metadata, 2)
    # assert
    assert padded == [{}] * 2
async def test_pad_fills_to_length():
    # arrange
    metadata = [{"mimetype": "text/plain"}]
    # act
    padded = BountyFilter.pad_metadata(metadata, 2)
    # assert
    assert padded == [{"mimetype": "text/plain"}, {}]
Example #7
0
    async def scan(self, guid, artifact_type, uri, expiration_blocks, metadata,
                   chain):
        """Creates a set of jobs to scan all the artifacts at the given URI that are passed via Redis to workers

            Args:
                guid (str): GUID of the associated bounty
                artifact_type (ArtifactType): Artifact type for the bounty being scanned
                uri (str):  Base artifact URI
                expiration_blocks (int): Blocks until vote round ends
                metadata (list[dict]) List of metadata json blobs for artifacts
                chain (str): Chain we are operating on

            Returns:
                list(ScanResult): List of ScanResult objects
            """
        # Ensure we don't wait past the vote round duration for one long artifact
        timeout = expiration_blocks - self.time_to_post
        logger.info(f' timeout set to {timeout}')

        async def wait_for_result(result_key):
            remaining = KEY_TIMEOUT
            try:
                with await self.redis as redis:
                    while True:
                        result = await redis.blpop(result_key, timeout=0)

                        if result:
                            break

                        if remaining == 0:
                            logger.critical(
                                'Timeout waiting for result in bounty %s',
                                guid)
                            return None

                        remaining -= 1
                        await asyncio.sleep(1)

                    j = json.loads(result[1].decode('utf-8'))

                    # increase perf counter for autoscaling
                    q_counter = f'{self.queue}_scan_result_counter'
                    await redis.incr(q_counter)

                    return j['index'], ScanResult(bit=j['bit'],
                                                  verdict=j['verdict'],
                                                  confidence=j['confidence'],
                                                  metadata=j['metadata'])
            except aioredis.errors.ReplyError:
                logger.exception('Redis out of memory')
            except OSError:
                logger.exception('Redis connection down')
            except (AttributeError, ValueError, KeyError):
                logger.error('Received invalid response from worker')
                return None

        num_artifacts = len(await self.client.list_artifacts(uri))
        # Fill out metadata to match same number of artifacts
        metadata = BountyFilter.pad_metadata(metadata, num_artifacts)

        jobs = []
        for i in range(num_artifacts):
            if self.bounty_filter is None or self.bounty_filter.is_allowed(
                    metadata[i]):
                jobs.append(
                    json.dumps({
                        'ts': time.time() // 1,
                        'guid': guid,
                        'artifact_type': artifact_type.value,
                        'uri': uri,
                        'index': i,
                        'chain': chain,
                        'duration': timeout,
                        'polyswarmd_uri': self.client.polyswarmd_uri,
                        'metadata': metadata[i]
                    }))

        if jobs:
            try:
                await self.redis.rpush(self.queue, *jobs)

                key = '{}_{}_{}_results'.format(self.queue, guid, chain)
                results = await asyncio.gather(
                    *[wait_for_result(key) for _ in jobs])
                results = {r[0]: r[1] for r in results if r is not None}

                # Age off old result keys
                await self.redis.expire(key, KEY_TIMEOUT)

                return [
                    results.get(i, ScanResult()) for i in range(num_artifacts)
                ]
            except OSError:
                logger.exception('Redis connection down')
            except aioredis.errors.ReplyError:
                logger.exception('Redis out of memory')

        return []