def __init__(self, client, testing=0, scanner=None, chains=None, artifact_types=None, bid_strategy=None,
                 accept=None, exclude=None):
        self.client = client
        self.chains = chains
        self.scanner = scanner
        if artifact_types is None:
            self.valid_artifact_types = [ArtifactType.FILE]
        else:
            self.valid_artifact_types = artifact_types

        self.bounty_filter = BountyFilter(accept, exclude)

        self.client.on_run.register(self.__handle_run)
        self.client.on_new_bounty.register(self.__handle_new_bounty)
        self.client.on_reveal_assertion_due.register(self.__handle_reveal_assertion)
        self.client.on_quorum_reached.register(self.__handle_quorum_reached)
        self.client.on_settled_bounty.register(self.__handle_settled_bounty)
        self.client.on_settle_bounty_due.register(self.__handle_settle_bounty)

        self.bid_strategy = bid_strategy

        self.testing = testing
        self.bounties_pending = {}
        self.bounties_pending_locks = {}
        self.bounties_seen = 0
        self.reveals_posted = 0
        self.settles_posted = 0
async def test_excluded():
    # arrange
    bounty_filter = BountyFilter(None, [("mimetype", "text/plain")])
    # act
    allowed = bounty_filter.is_allowed({"mimetype": "text/plain"})
    # assert
    assert not allowed
async def test_not_accepted():
    # arrange
    bounty_filter = BountyFilter([("mimetype", "text/plain")], None)
    # act
    allowed = bounty_filter.is_allowed({"mimetype": "text/html"})
    # assert
    assert not allowed
async def test_scans_artifact_accepted_match_only_one():
    # arrange
    bounty_filter = BountyFilter([("mimetype", "text/plain"),
                                  ("mimetype", "text/html")], None)
    # act
    allowed = bounty_filter.is_allowed({"mimetype": "text/html"})
    # assert
    assert allowed
    async def fetch_and_scan_all(self, guid, artifact_type, uri, duration, metadata, chain):
        """Fetch and scan all artifacts concurrently

        Args:
            guid (str): GUID of the associated bounty
            artifact_type (ArtifactType): Artifact type for the bounty being scanned
            uri (str):  Base artifact URI
            duration (int): Max number of blocks to take
            metadata (list[dict]) List of metadata json blobs for artifacts
            chain (str): Chain we are operating on

        Returns:
            (list(bool), list(bool), list(str)): Tuple of mask bits, verdicts, and metadatas
        """
        async def fetch_and_scan(artifact_metadata, index):
            content = await self.client.get_artifact(uri, index)
            if not self.bounty_filter.is_allowed(artifact_metadata):
                return ScanResult()

            if content is not None:
                return await self.scan(guid, artifact_type, content, artifact_metadata, chain)

            return ScanResult()

        artifacts = await self.client.list_artifacts(uri)
        metadata = BountyFilter.pad_metadata(metadata, len(artifacts))

        return await asyncio.gather(*[
            fetch_and_scan(metadata[i], i) for i in range(len(artifacts))
        ])
async def test_pad_fills_empty_to_length():
    # arrange
    metadata = []
    # act
    padded = BountyFilter.pad_metadata(metadata, 2)
    # assert
    assert padded == [{}] * 2
async def test_no_pad_on_too_long():
    # arrange
    metadata = [{"mimetype": "text/plain"}] * 10
    # act
    padded = BountyFilter.pad_metadata(metadata, 5)
    # assert
    assert padded == metadata
async def test_no_pad_on_match_length():
    # arrange
    metadata = [{"mimetype": "text/plain"}] * 5
    # act
    padded = BountyFilter.pad_metadata(metadata, 5)
    # assert
    assert padded == metadata
async def test_pad_fills_with_none_on_invalid_metadata():
    # arrange
    metadata = [{"asdf": "asdf"}]
    # act
    padded = BountyFilter.pad_metadata(metadata, 2)
    # assert
    assert padded == [{}] * 2
async def test_pad_fills_to_length():
    # arrange
    metadata = [{"mimetype": "text/plain"}]
    # act
    padded = BountyFilter.pad_metadata(metadata, 2)
    # assert
    assert padded == [{"mimetype": "text/plain"}, {}]
class AbstractMicroengine(object):
    def __init__(self, client, testing=0, scanner=None, chains=None, artifact_types=None, bid_strategy=None,
                 accept=None, exclude=None):
        self.client = client
        self.chains = chains
        self.scanner = scanner
        if artifact_types is None:
            self.valid_artifact_types = [ArtifactType.FILE]
        else:
            self.valid_artifact_types = artifact_types

        self.bounty_filter = BountyFilter(accept, exclude)

        self.client.on_run.register(self.__handle_run)
        self.client.on_new_bounty.register(self.__handle_new_bounty)
        self.client.on_reveal_assertion_due.register(self.__handle_reveal_assertion)
        self.client.on_quorum_reached.register(self.__handle_quorum_reached)
        self.client.on_settled_bounty.register(self.__handle_settled_bounty)
        self.client.on_settle_bounty_due.register(self.__handle_settle_bounty)

        self.bid_strategy = bid_strategy

        self.testing = testing
        self.bounties_pending = {}
        self.bounties_pending_locks = {}
        self.bounties_seen = 0
        self.reveals_posted = 0
        self.settles_posted = 0

    @classmethod
    def connect(cls, polyswarmd_addr, keyfile, password, api_key=None, testing=0, insecure_transport=False,
                scanner=None, chains=None, artifact_types=None, bid_strategy=None, accept=None, exclude=None):
        """Connect the Microengine to a Client.

        Args:
            polyswarmd_addr (str): URL of polyswarmd you are referring to.
            keyfile (str): Keyfile filename.
            password (str): Password associated with Keyfile.
            api_key (str): Your PolySwarm API key.
            testing (int): Number of testing bounties to use.
            insecure_transport (bool): Allow insecure transport such as HTTP?
            scanner (Scanner): `Scanner` instance to use.
            chains (set(str)):  Set of chains you are acting on.
            artifact_types (list(ArtifactType)): List of artifact types you support
            bid_strategy (BidStrategyBase): Bid Strategy for bounties
            accept (list[tuple[str]]): List of accepted mimetypes
            exclude (list[tuple[str]]): List of excluded mimetypes

        Returns:
            AbstractMicroengine: Microengine instantiated with a Client.
        """
        client = Client(polyswarmd_addr, keyfile, password, api_key, testing > 0, insecure_transport)
        return cls(client, testing, scanner, chains, artifact_types, bid_strategy=bid_strategy,
                   accept=accept, exclude=exclude)

    async def scan(self, guid, artifact_type, content, metadata, chain):
        """Override this to implement custom scanning logic

        Args:
            guid (str): GUID of the bounty under analysis, use to track artifacts in the same bounty
            artifact_type (ArtifactType): Artifact type for the bounty being scanned
            content (bytes): Content of the artifact to be scan
            metadata (dict): Metadata about the artifact being scanned
            chain (str): Chain we are operating on
        Returns:
            ScanResult: Result of this scan
        """
        if self.scanner:
            return await self.scanner.scan(guid, artifact_type, content, metadata, chain)

        raise NotImplementedError(
            "You must 1) override this scan method OR 2) provide a scanner to your Microengine constructor")

    async def bid(self, guid, mask, verdicts, confidences, metadatas, chain):
        """Override this to implement custom bid calculation logic

        Args:
            guid (str): GUID of the bounty under analysis
            mask (list[bool]): mask for the from scanning the bounty files
            verdicts (list[bool]): scan verdicts from scanning the bounty files
            confidences (list[float]): Measure of confidence of verdict per artifact ranging from 0.0 to 1.0
            metadatas (list[str]): metadata blurbs from scanning the bounty files
            chain (str): Chain we are operating on

        Returns:
            int: Amount of NCT to bid in base NCT units (10 ^ -18)
        """
        min_allowed_bid = await self.client.bounties.parameters[chain].get('assertion_bid_minimum')
        if self.bid_strategy is not None:
            return max(
                min_allowed_bid,
                await self.bid_strategy.bid(guid, mask, verdicts, confidences, metadatas, min_allowed_bid, chain)
            )

        raise NotImplementedError(
            "You must 1) override this bid method OR 2) provide a bid_strategy to your Microengine constructor")

    async def fetch_and_scan_all(self, guid, artifact_type, uri, duration, metadata, chain):
        """Fetch and scan all artifacts concurrently

        Args:
            guid (str): GUID of the associated bounty
            artifact_type (ArtifactType): Artifact type for the bounty being scanned
            uri (str):  Base artifact URI
            duration (int): Max number of blocks to take
            metadata (list[dict]) List of metadata json blobs for artifacts
            chain (str): Chain we are operating on

        Returns:
            (list(bool), list(bool), list(str)): Tuple of mask bits, verdicts, and metadatas
        """
        async def fetch_and_scan(artifact_metadata, index):
            content = await self.client.get_artifact(uri, index)
            if not self.bounty_filter.is_allowed(artifact_metadata):
                return ScanResult()

            if content is not None:
                return await self.scan(guid, artifact_type, content, artifact_metadata, chain)

            return ScanResult()

        artifacts = await self.client.list_artifacts(uri)
        metadata = BountyFilter.pad_metadata(metadata, len(artifacts))

        return await asyncio.gather(*[
            fetch_and_scan(metadata[i], i) for i in range(len(artifacts))
        ])

    def run(self):
        """
        Run the `Client` on the Microengine's chains.
        """
        self.client.run(self.chains)

    async def __handle_run(self, chain):
        """Perform setup required once on correct loop

        Args:
            chain (str): Chain we are operating on.
        """
        self.bounties_pending_locks[chain] = asyncio.Lock()
        if self.scanner is not None and not await self.scanner.setup():
            logger.critical('Scanner instance reported unsuccessful setup. Exiting.')
            exit(1)

    async def __handle_new_bounty(self, guid, artifact_type, author, amount, uri, expiration, metadata, block_number, txhash, chain):
        """Scan and assert on a posted bounty

        Args:
            guid (str): The bounty to assert on
            artifact_type (ArtifactType): The type of artifacts in this bounty
            author (str): The bounty author
            amount (str): Amount of the bounty in base NCT units (10 ^ -18)
            uri (str): IPFS hash of the root artifact
            expiration (str): Block number of the bounty's expiration
            metadata (dict): Dictionary of metadata or None
            block_number (int): Block number the bounty was placed on
            txhash (str): Transaction hash which caused the event
            chain (str): Is this on the home or side chain?

        Returns:
            Response JSON parsed from polyswarmd containing placed assertions
        """
        # Skip bounties for types we don't support
        if artifact_type not in self.valid_artifact_types:
            logger.info('Bounty artifact type %s is not supported', artifact_type)
            return []

        async with self.bounties_pending_locks[chain]:
            bounties_pending = self.bounties_pending.get(chain, set())
            if guid in bounties_pending:
                logger.debug(f'Bounty {guid} already seen, not responding')
                return []
            self.bounties_pending[chain] = bounties_pending | {guid}

        self.bounties_seen += 1
        if self.testing > 0:
            if self.bounties_seen > self.testing:
                logger.warning('Received new bounty, but finished with testing mode')
                return []
            logger.info(f'Testing mode, {self.testing - self.bounties_seen} bounties remaining')

        expiration = int(expiration)
        duration = expiration - block_number

        results = await self.fetch_and_scan_all(guid, artifact_type, uri, duration, metadata, chain)
        mask = [r.bit for r in results]
        verdicts = [r.verdict for r in results]
        confidences = [r.confidence for r in results]
        metadatas = [r.metadata for r in results]
        combined_metadata = ';'.join(metadatas)

        try:
            if all([metadata and verdict.Verdict.validate(json.loads(metadata)) for metadata in metadatas]):
                combined_metadata = json.dumps([json.loads(metadata) for metadata in metadatas])
        except json.JSONDecodeError:
            logger.exception(f'Error decoding assertion metadata {metadatas}')

        if not any(mask):
            return []

        assertion_fee = await self.client.bounties.parameters[chain].get('assertion_fee')
        assertion_reveal_window = await self.client.bounties.parameters[chain].get('assertion_reveal_window')
        arbiter_vote_window = await self.client.bounties.parameters[chain].get('arbiter_vote_window')

        # Check that microengine has sufficient balance to handle the assertion
        bid = await self.bid(guid, mask, verdicts, confidences, metadatas, chain)
        balance = await self.client.balances.get_nct_balance(chain)
        if balance < assertion_fee + bid:
            logger.critical(f'Insufficient balance to post assertion for bounty on {chain}. Have {balance} NCT. '
                            f'Need {assertion_fee + bid} NCT', extra={'extra': guid})
            if self.testing > 0:
                exit(1)

            return []

        logger.info(f'Responding to {artifact_type.name.lower()} bounty {guid}')
        nonce, assertions = await self.client.bounties.post_assertion(guid, bid, mask, verdicts, chain)
        for a in assertions:
            # Post metadata to IPFS and post ipfs_hash as metadata, if it exists
            ipfs_hash = await self.client.bounties.post_metadata(combined_metadata, chain)
            metadata = ipfs_hash if ipfs_hash is not None else combined_metadata
            ra = RevealAssertion(guid, a['index'], nonce, verdicts, metadata)
            self.client.schedule(expiration, ra, chain)

            sb = SettleBounty(guid)
            self.client.schedule(expiration + assertion_reveal_window + arbiter_vote_window, sb, chain)

        return assertions

    async def __handle_reveal_assertion(self, bounty_guid, index, nonce, verdicts, metadata, chain):
        """
        Callback registered in `__init__` to handle the reveal assertion.

        Args:
            bounty_guid (str): GUID of the bounty being asserted on
            index (int): Index of the assertion to reveal
            nonce (str): Secret nonce used to reveal assertion
            verdicts (List[bool]): List of verdicts for each artifact in the bounty
            metadata (str): Optional metadata
            chain (str): Chain to operate on
        Returns:
            Response JSON parsed from polyswarmd containing emitted events
        """
        self.reveals_posted += 1
        if self.testing > 0:
            if self.reveals_posted > self.testing:
                logger.warning('Scheduled reveal, but finished with testing mode')
                return []
            logger.info(f'Testing mode, {self.testing - self.reveals_posted} reveals remaining')
        return await self.client.bounties.post_reveal(bounty_guid, index, nonce, verdicts, metadata, chain)

    async def __do_handle_settle_bounty(self, bounty_guid, chain):
        """
        Callback registered in `__init__` to handle a settled bounty.

        Args:
            bounty_guid (str): GUID of the bounty being asserted on
            chain (str): Chain to operate on
        Returns:
            Response JSON parsed from polyswarmd containing emitted events
        """
        async with self.bounties_pending_locks[chain]:
            bounties_pending = self.bounties_pending.get(chain, set())
            if bounty_guid not in bounties_pending:
                logger.debug(f'Bounty {bounty_guid} already settled')
                return []
            self.bounties_pending[chain] = bounties_pending - {bounty_guid}

        self.settles_posted += 1
        if self.testing > 0:
            if self.settles_posted > self.testing:
                logger.warning('Scheduled settle, but finished with testing mode')
                return []
            logger.info(f'Testing mode, {self.testing - self.settles_posted} settles remaining')

        ret = await self.client.bounties.settle_bounty(bounty_guid, chain)
        if 0 < self.testing <= self.settles_posted:
            logger.info("All testing bounties complete, exiting")
            asyncio_stop()
        return ret

    async def __handle_quorum_reached(self, bounty_guid, block_number, txhash, chain):
        return await self.__do_handle_settle_bounty(bounty_guid, chain)

    async def __handle_settle_bounty(self, bounty_guid, chain):
        return await self.__do_handle_settle_bounty(bounty_guid, chain)

    async def __handle_settled_bounty(self, bounty_guid, settler, payout, block_number, txhash, chain):
        return await self.__do_handle_settle_bounty(bounty_guid, chain)
Esempio n. 12
0
    async def scan(self, guid, artifact_type, uri, expiration_blocks, metadata,
                   chain):
        """Creates a set of jobs to scan all the artifacts at the given URI that are passed via Redis to workers

            Args:
                guid (str): GUID of the associated bounty
                artifact_type (ArtifactType): Artifact type for the bounty being scanned
                uri (str):  Base artifact URI
                expiration_blocks (int): Blocks until vote round ends
                metadata (list[dict]) List of metadata json blobs for artifacts
                chain (str): Chain we are operating on

            Returns:
                list(ScanResult): List of ScanResult objects
            """
        # Ensure we don't wait past the vote round duration for one long artifact
        timeout = expiration_blocks - self.time_to_post
        logger.info(f' timeout set to {timeout}')

        async def wait_for_result(result_key):
            remaining = KEY_TIMEOUT
            try:
                with await self.redis as redis:
                    while True:
                        result = await redis.blpop(result_key, timeout=0)

                        if result:
                            break

                        if remaining == 0:
                            logger.critical(
                                'Timeout waiting for result in bounty %s',
                                guid)
                            return None

                        remaining -= 1
                        await asyncio.sleep(1)

                    j = json.loads(result[1].decode('utf-8'))

                    # increase perf counter for autoscaling
                    q_counter = f'{self.queue}_scan_result_counter'
                    await redis.incr(q_counter)

                    return j['index'], ScanResult(bit=j['bit'],
                                                  verdict=j['verdict'],
                                                  confidence=j['confidence'],
                                                  metadata=j['metadata'])
            except aioredis.errors.ReplyError:
                logger.exception('Redis out of memory')
            except OSError:
                logger.exception('Redis connection down')
            except (AttributeError, ValueError, KeyError):
                logger.error('Received invalid response from worker')
                return None

        num_artifacts = len(await self.client.list_artifacts(uri))
        # Fill out metadata to match same number of artifacts
        metadata = BountyFilter.pad_metadata(metadata, num_artifacts)

        jobs = []
        for i in range(num_artifacts):
            if self.bounty_filter is None or self.bounty_filter.is_allowed(
                    metadata[i]):
                jobs.append(
                    json.dumps({
                        'ts': time.time() // 1,
                        'guid': guid,
                        'artifact_type': artifact_type.value,
                        'uri': uri,
                        'index': i,
                        'chain': chain,
                        'duration': timeout,
                        'polyswarmd_uri': self.client.polyswarmd_uri,
                        'metadata': metadata[i]
                    }))

        if jobs:
            try:
                await self.redis.rpush(self.queue, *jobs)

                key = '{}_{}_{}_results'.format(self.queue, guid, chain)
                results = await asyncio.gather(
                    *[wait_for_result(key) for _ in jobs])
                results = {r[0]: r[1] for r in results if r is not None}

                # Age off old result keys
                await self.redis.expire(key, KEY_TIMEOUT)

                return [
                    results.get(i, ScanResult()) for i in range(num_artifacts)
                ]
            except OSError:
                logger.exception('Redis connection down')
            except aioredis.errors.ReplyError:
                logger.exception('Redis out of memory')

        return []