コード例 #1
0
class Broker(Thread, Singleton, WorkerNodeDelegate, ProcessorDelegate):
    """
    Broker manages all underlying services/threads and arranges communications between them. Broker directly manages
    WebAPI and Ethereum threads and provides delegate interfaces for capturing their output via callback functions.
    This is done via implementing `EthDelegate` and `WebDelegate` abstract classes.
    """

    # ----------------------------------------------------------------------------------------------------------
    # Initialization
    # ----------------------------------------------------------------------------------------------------------
    def __init__(self, eth_server: str, abi_path: str, pandora: str, node: str,
                 ipfs_server: str, ipfs_port: int, data_dir: str):
        Broker.get_instance()
        Thread.__init__(self, daemon=True)

        # Initializing logger object
        self.logger = logging.getLogger("Broker")
        self.logger.addHandler(LogSocketHandler.get_instance())
        self.manager = Manager.get_instance()
        self.mode = self.manager.launch_mode

        # Saving starter configs
        self.eth_server = eth_server
        self.abi_path = abi_path
        self.pandora = pandora
        self.node = node
        self.ipfs_server = ipfs_server
        self.ipfs_port = ipfs_port
        self.data_dir = data_dir

        # Init empty container for pandora
        self.pandora_container = None

        # Init empty containers for worker node
        self.worker_node_container = None
        self.worker_node_state_machine = None
        self.worker_node_event_thread = None

        # Init empty containers for job
        self.job_address = None
        self.job_container = None
        self.job_state_machine = None
        self.job_state_event_thread = None

        # Init empty jobs and processor
        self.jobs = {}
        self.processors = {}

        # init connectors
        self.eth = EthService(strategic=EthConnector())
        self.ipfs = IpfsService(strategic=IpfsConnector())

        self.local_password = None
        self.key_tool = KeyTools()
        print('Pandora broker initialize success')

# ----------------------------------------------------------------------------------------------------------
# Base connection and start pynode
# ----------------------------------------------------------------------------------------------------------

    def connect(self) -> bool:
        if self.eth is not None:
            # init base contracts containers
            self.pandora_container = self.eth.init_contract(
                server_address=self.manager.eth_host,
                contract_address=self.pandora,
                contract_abi=self.manager.eth_pandora_contract)
            self.logger.info(
                'Pandora contract initialized success on address : ' +
                self.pandora)
            self.worker_node_container = self.eth.init_contract(
                server_address=self.manager.eth_host,
                contract_address=self.node,
                contract_abi=self.manager.eth_worker_contract)
            self.logger.info(
                'Worker contract initialized success on address : ' +
                self.node)

            # init worker contract owner account
            if self.key_tool.check_vault():
                self.logger.info('Account vault is located')
                vault_data = self.key_tool.obtain_key(self.manager.vault_key)
                # split data to pass and p_key
                self.local_password = vault_data.split("_", 1)[0]
                vault_account = vault_data.split("_", 1)[0]
                local_p_key = vault_data.split("_", 1)[1]
                if (vault_account.lower()
                        in self.manager.eth_worker_node_account.lower()) and (
                            local_p_key is not ''):
                    self.logger.info('Vault check success')
                else:
                    self.logger.info('Unable to unlock account vault.')
                    self.logger.info('Please provide pynode configuration.')
                    return False
            else:
                self.logger.info('Unable to locate account vault.')
                self.logger.info('Please provide pynode configuration.')
                return False

            self.logger.info('Worker account determination success')
            # init worker node state machine and get current state
            self.worker_node_state_machine = WorkerNode(
                contract_container=self.worker_node_container,
                delegate=self,
                address=self.node,
                contract=self.manager.eth_worker_contract)
            # bind worker node states listener thread
            filter_on_worker = self.worker_node_container.events.StateChanged.createFilter(
                fromBlock='latest')
            self.worker_node_event_thread = Thread(
                target=self.worker_filter_thread_loop,
                args=(filter_on_worker, 5),
                daemon=True)
            self.worker_node_event_thread.start()
            status = self.worker_node_event_thread.is_alive()
            self.logger.info(
                'Event listener for worker node creation startup success, alive : '
                + str(status))
            self.logger.info(
                'Worker node state event thread listener initialize success')

            # process current worker node state after worker node event thread is initialized
            current_worker_node_state = self.worker_node_state_machine.process_state(
            )
            self.logger.info(
                'Worker node state machine initialized success with state : ' +
                str(current_worker_node_state))

            # start main broker thread
            super().start()
            self.logger.info("Broker started successfully")
            # ----------------------------------------------------------------------------------
            # JOIN WORKER EVENTS CHANGE LISTENER to main process
            # ----------------------------------------------------------------------------------
            if self.mode == "0":  # join threads only in production mode
                self.worker_node_event_thread.join()
                if self.job_state_event_thread:
                    self.job_state_event_thread.join()
            return True
        else:
            self.logger.info('Pynode eth connector not instantiated. exit')
            return False

    def disconnect(self):
        super().join()

# ----------------------------------------------------------------------------------------------------------
# JOB and PROCESSOR initialization
# ----------------------------------------------------------------------------------------------------------
# todo job address is necessary ADD it to method call parameters

    def init_cognitive_job(self) -> bool:
        self.manager.job_contract_address = self.job_address
        self.job_container = self.eth.init_contract(
            server_address=self.manager.eth_host,
            contract_address=self.job_address,
            contract_abi=self.manager.eth_cognitive_job_contract)
        self.logger.info('Job contract initialized success on address : ' +
                         self.job_address)
        self.job_state_machine = CognitiveJob(
            contract_container=self.job_container,
            delegate=self,
            address=self.job_address,
            contract=self.manager.eth_cognitive_job_contract)
        current_job_state = self.job_state_machine.process_state()
        self.logger.info(
            'Cognition job state machine initialized success with state : ' +
            str(current_job_state))
        self.jobs[self.job_address] = self.job_container

        filter_on_job = self.job_container.events.StateChanged.createFilter(
            fromBlock='latest')
        self.job_state_event_thread = Thread(
            target=self.job_filter_thread_loop,
            args=(filter_on_job, 7),
            daemon=True)
        self.job_state_event_thread.start()
        status = self.job_state_event_thread.is_alive()
        self.logger.info(
            'Event listener for job states creation startup success, alive : '
            + str(status))
        self.logger.info(
            'Cognitive job state event thread listener initialize success')
        return True

    # TODO for multiprocess cognition need to rebuild processor init flow
    def init_processor(self) -> Processor:
        # get kernel and dataset
        # prepare processor for calculating data
        if self.ipfs is not None:
            # init job container if empty
            if not self.job_container:
                self.job_address = self.worker_node_container.call().activeJob(
                )
                self.init_cognitive_job()

            try:
                kernel_address = self.job_container.call().kernel()
                dataset_address = self.job_container.call().dataset()
            except Exception as ex:
                self.logger.error(
                    "Exception initializing job internal contract")
                self.logger.error(ex.args)
                return False

            self.logger.info('Start determinate kernel and dataset contracts')
            try:
                kernel_container = self.eth.init_contract(
                    server_address=self.manager.eth_host,
                    contract_address=kernel_address,
                    contract_abi=self.manager.eth_kernel_contract)
                self.logger.info('Kernel contract instantiated success')
                dataset_container = self.eth.init_contract(
                    server_address=self.manager.eth_host,
                    contract_address=dataset_address,
                    contract_abi=self.manager.eth_dataset_contract)
                self.logger.info('Dataset contract instantiated success')
            except Exception as ex:
                self.logger.error("Exception contract initializing")
                self.logger.error(ex.args)

            # get kernel and dataset addresses
            kernel_ipfs_address = kernel_container.call().ipfsAddress()
            dataset_ipfs_address = dataset_container.call().ipfsAddress()
            self.logger.info('Kernel ipfs address : ' +
                             str(kernel_ipfs_address))
            self.logger.info('Dataset ipfs address : ' +
                             str(dataset_ipfs_address))

            # determinate batch for current job
            job = self.jobs[self.job_address]
            workers = []
            workers_count = job.call().activeWorkersCount()
            for w in range(0, workers_count):
                workers.append(job.call().activeWorkers(w).lower())
            batch = None
            for idx, w in enumerate(workers):
                if self.node.lower() == w.lower():
                    batch = idx
                    self.logger.info('BATCH_INDEX : ' + str(batch))
                    break
            if batch is None:
                raise Exception("Can't determine this node batch number")
            # prepare ipfs
            self.logger.info('Start loading files data...')
            self.ipfs.connect(server=self.ipfs_server,
                              port=self.ipfs_port,
                              data_dir=self.data_dir)
            self.logger.info('IPFS connection instantiated success')
            # load kernel and dataset root files
            self.ipfs.download_file(kernel_ipfs_address.decode("utf-8"))
            self.logger.info('Kernel datafile download success...')
            self.ipfs.download_file(dataset_ipfs_address.decode("utf-8"))
            self.logger.info('Dataset datafile download success...')

            processor_id = '%s:%s' % (self.node, self.job_address)
            # processor initialization
            processor = Processor(ipfs_api=self.ipfs,
                                  processor_id=processor_id,
                                  delegate=self)
            self.processors[processor_id] = processor
            processor.run()
            processor.prepare(
                kernel_file=self.read_file(kernel_ipfs_address),
                dataset_file=self.read_file(dataset_ipfs_address),
                batch=batch)
            return processor

    @staticmethod
    def read_file(file_address) -> dict:
        with open(file_address) as json_file:
            info = json.load(json_file)
        return info

    @staticmethod
    def check_job_address(job_address: str) -> str:
        result = job_address
        trimmed_address = job_address.replace('0', '')
        if trimmed_address == 'x':
            result = None
        return result

# ----------------------------------------------------------------------------------------------------------
# Broker listeners for state table change states processing
# ----------------------------------------------------------------------------------------------------------
# worker node state filter thread loop

    def worker_filter_thread_loop(self, event_filter, poll_interval):
        while True:
            try:
                for event in event_filter.get_new_entries(
                ):  # get_all_entries()
                    self.on_worker_node_state_change(event)
                time.sleep(poll_interval)
            except Exception as ex:
                # https://github.com/ethereum/web3.py/issues/354
                if isinstance(ex.args, tuple):
                    if len(ex.args) > 0:
                        message = ex.args[0]
                        if 'filter not found' in str(message):
                            # sometimes for unknown reason filter drops on eth node, so recreate it
                            self.logger.info('work_filter recreated')
                            self.worker_node_container.events.StateChanged.reset(
                            )
                            self.worker_node_container.events.StateChanged.uninstall(
                            )
                            event_filter = self.worker_node_container.events.StateChanged.createFilter(
                                fromBlock='latest')
                else:
                    self.logger.info('Exception on worker event handler.')
                    self.logger.info(ex.args)

    def on_worker_node_state_change(self, event: dict):
        worker_state_table = self.worker_node_state_machine.state_table
        state_old = event['args']['oldState']
        state_new = event['args']['newState']
        self.logger.info("Contract WorkerNode changed its state from %s to %s",
                         worker_state_table[state_old].name,
                         worker_state_table[state_new].name)
        self.worker_node_state_machine.state = state_new

    # job state filter loop
    def job_filter_thread_loop(self, event_filter, pool_interval):
        while True:
            try:
                for event in event_filter.get_new_entries(
                ):  # get_all_entries()
                    self.on_cognitive_job_state_change(event)
                time.sleep(pool_interval)
            except Exception as ex:
                # https://github.com/ethereum/web3.py/issues/354
                if isinstance(ex.args, tuple):
                    if len(ex.args) > 0:
                        message = ex.args[0]
                        if 'filter not found' in str(message):
                            # sometimes for unknown reason filter drops on eth node, so recreate it
                            self.logger.info('job_filter recreated')
                            self.job_container.events.StateChanged.reset()
                            self.job_container.events.StateChanged.uninstall()
                            event_filter = self.job_container.events.StateChanged.createFilter(
                                fromBlock='latest')
                else:
                    self.logger.info('Exception on job event handler.')
                    self.logger.info(ex.args)

    def on_cognitive_job_state_change(self, event: dict):
        job_state_table = self.job_state_machine.state_table
        state_old = event['args']['oldState']
        state_new = event['args']['newState']
        self.logger.info(
            "Contract Cognitive job changed its state from %s to %s",
            job_state_table[state_old].name, job_state_table[state_new].name)
        self.job_state_machine.state = state_new

# ----------------------------------------------------------------------------------------------------------
# Worker node delegate methods
# ----------------------------------------------------------------------------------------------------------

    def create_cognitive_job(self):
        if self.job_container:
            return
        job_address = self.worker_node_container.call().activeJob()
        if self.check_job_address(job_address) is None:
            self.logger.info("Job address is empty, cant determinate job")
            return
        if job_address in self.jobs:
            return
        self.logger.info("Initializing cognitive job contract for address %s",
                         job_address)
        self.job_address = job_address
        if self.init_cognitive_job() is False:
            self.logger.error(
                "Error initializing cognitive job for address %s", job_address)

    def start_validating(self):
        self.logger.info("Starting validating data")
        try:
            processor = self.init_processor()
        except Exception as ex:
            self.logger.error("Error during processor initialization: %s",
                              type(ex))
            self.logger.error(ex.args)
            self.processor_load_failure(None)
            return
        processor.load()

    def start_computing(self):
        self.logger.info("Starting computing cognitive job")
        if not self.processors:  # if processors is empty init it
            try:
                processor = self.init_processor()
            except Exception as ex:
                self.logger.error("Error during processor initialization: %s",
                                  type(ex))
                self.logger.error(ex.args)
                self.processor_computing_failure(None)
                return
            # start computing after processor init
            processor.compute()
        else:
            list(self.processors.values())[0].compute()

    def state_transact(self, name: str, *result_file):
        self.logger.info("Transact to worker node : " + name)
        private_key = self.key_tool.obtain_key(self.manager.vault_key).split(
            "_", 1)[1]
        tx_status = 0
        while tx_status == 0:
            try:
                nonce = self.worker_node_container.web3.eth.getTransactionCount(
                    self.manager.eth_worker_node_account)
                raw_transaction = None
                if name in 'alive':
                    raw_transaction = self.worker_node_container.functions.alive() \
                        .buildTransaction({
                            'from': self.manager.eth_worker_node_account,
                            'nonce': nonce})
                if name in 'acceptAssignment':
                    raw_transaction = self.worker_node_container.functions.acceptAssignment() \
                        .buildTransaction({
                            'from': self.manager.eth_worker_node_account,
                            'nonce': nonce})
                if name in 'processToDataValidation':
                    raw_transaction = self.worker_node_container.functions.processToDataValidation() \
                        .buildTransaction({
                            'from': self.manager.eth_worker_node_account,
                            'nonce': nonce})
                if name in 'reportInvalidData':
                    raw_transaction = self.worker_node_container.functions.reportInvalidData() \
                        .buildTransaction({
                            'from': self.manager.eth_worker_node_account,
                            'nonce': nonce})
                if name in 'acceptValidData':
                    raw_transaction = self.worker_node_container.functions.acceptValidData() \
                        .buildTransaction({
                            'from': self.manager.eth_worker_node_account,
                            'nonce': nonce})
                if name in 'processToCognition':
                    raw_transaction = self.worker_node_container.functions.processToCognition() \
                        .buildTransaction({
                            'from': self.manager.eth_worker_node_account,
                            'nonce': nonce})
                if name in 'provideResults':
                    raw_transaction = self.worker_node_container.functions.provideResults(str.encode(result_file[0])) \
                        .buildTransaction({
                            'from': self.manager.eth_worker_node_account,
                            'nonce': nonce})

                if raw_transaction is not None:
                    signed_transaction = self.worker_node_container.web3.eth.account.signTransaction(
                        raw_transaction, private_key)
                    tx_hash = self.worker_node_container.web3.eth.sendRawTransaction(
                        signed_transaction.rawTransaction)
                    self.logger.info('TX_HASH : ' + tx_hash.hex())
                    self.logger.info('Waiting for receipt...')
                    transaction_receipt = self.worker_node_container.web3.eth.waitForTransactionReceipt(
                        tx_hash, timeout=300)
                    self.logger.info('TX_RECEIPT : ' +
                                     str(transaction_receipt))
                    self.logger.info('TRANSACTION_STATUS = ' +
                                     str(transaction_receipt['status']))
                    tx_status = transaction_receipt['status']
                else:
                    self.logger.info('Unknown state transaction. Skip.')
                    tx_status = 1  # for unknown state transaction reason
            except Exception as ex:
                self.logger.error("Error executing %s transaction: %s", name,
                                  type(ex))
                self.logger.error(ex.args)
                raise CriticalTransactionError(name)
        return

# ----------------------------------------------------------------------------------------------------------
# Cognitive job delegate methods
# ----------------------------------------------------------------------------------------------------------

    def terminate_job(self, job: CognitiveJob):
        pass

    def transact(self, name: str, cb: Callable):
        pass

# ----------------------------------------------------------------------------------------------------------
# Processor delegate methods
# ----------------------------------------------------------------------------------------------------------

    def processor_load_complete(self, processor_id: str):
        self.logger.info('Processor loading complete.')
        self.logger.info('Confirming data validness')
        self.state_transact('acceptValidData')

    def processor_load_failure(self, processor_id: Union[str, None]):
        self.logger.info('Processor loading fail.')
        self.logger.info('Reporting invalid data')
        self.state_transact('reportInvalidData')

    def processor_computing_complete(self, processor_id: str,
                                     results_file: str):
        self.logger.info('Processor computing complete.')
        self.logger.info('Providing results')
        self.logger.info('Result file address : ' + results_file)
        self.manager.set_complete_reset()
        self.state_transact('provideResults', results_file)

    def processor_computing_failure(self, processor_id: Union[str, None]):
        self.logger.critical(
            "Can't complete computing, exiting in order to reboot and try to repeat the work"
        )
        sys.exit(1)
コード例 #2
0
class WorkerNodeStateMachineThread:
    def __init__(self, contract_container, delegate, address, contract,
                 state_delegate: WorkerNodeStateDelegate):
        # Initializing logger object
        self.logger = logging.getLogger("WorkerNodeStateMachineThread")
        self.logger.setLevel(logging.INFO)
        self.logger.addHandler(LogSocketHandler.get_instance())

        # init worker node
        self.worker_node = WorkerNode(contract_container=contract_container,
                                      delegate=delegate,
                                      address=address,
                                      contract=contract)

        # int filter and filter thread
        self.state_delegate = state_delegate
        self.worker_node_container = contract_container
        self.current_block_number = None

        self.filter_on_worker = contract_container.events.StateChanged.createFilter(
            fromBlock='latest')
        self.worker_node_event_thread = Thread(
            target=self.worker_filter_thread_loop, daemon=True)
        if not self.worker_node_event_thread.is_alive():
            self.worker_node_event_thread.start()

    # -------------------------------------
    # thread methods
    # -------------------------------------
    def get_worker_node_event_thread(self):
        return self.worker_node_event_thread

    def alive(self):
        return self.worker_node_event_thread.is_alive()

    # -------------------------------------
    # state methods
    # -------------------------------------
    def process_state(self):
        return self.worker_node.process_state()

    def state_table(self):
        return self.worker_node.state_table

    def state(self, new_state):
        self.worker_node.state = new_state

    # -------------------------------------
    # thread methods
    # -------------------------------------
    def worker_filter_thread_loop(self):
        past_block = self.worker_node_container.web3.eth.getBlock('latest')
        past_block_number = past_block.number
        last_call_time = 0
        while True:
            poll_interval, past_block, past_block_number = \
                self.calculate_thread_sleep_interval(past_block=past_block, past_block_number=past_block_number)
            try:
                if self.filter_on_worker:
                    if last_call_time + 120 < time.time(
                    ):  # recreate filter object
                        self.logger.info(
                            'work_filter recreated on object timeout')
                        self.filter_on_worker = self.worker_node_container.events.StateChanged.createFilter(
                            fromBlock=self.current_block_number - 2)
                        # TODO make state reset to 0
                        self.state(0)
                        self.process_state()
                    events = self.filter_on_worker.get_new_entries()
                    last_call_time = time.time()
                    for event in events:
                        # validate current state and worker node address
                        current_state = self.worker_node.state
                        new_state = event['args']['newState']
                        if current_state != new_state:
                            self.state_delegate.on_worker_node_state_change(
                                event)
                else:
                    self.logger.info('work_filter recreated on object null')
                    self.filter_on_worker = self.worker_node_container.events.StateChanged.createFilter(
                        fromBlock=self.current_block_number - 2)
                    # TODO make state reset to 0
                    self.state(0)
                    self.process_state()
            except Exception:
                self.logger.info('work_filter recreated due timeout')
                self.filter_on_worker = self.worker_node_container.events.StateChanged.createFilter(
                    fromBlock=self.current_block_number - 2)
                # TODO make state reset to 0
                self.state(0)
                self.process_state()
            time.sleep(poll_interval)

    def calculate_thread_sleep_interval(self, past_block, past_block_number):
        # calculate sleep thread time
        current_block = self.worker_node_container.web3.eth.getBlock('latest')
        self.current_block_number = current_block.number
        diff = self.current_block_number - past_block_number
        if diff >= 1:
            dynamic_poll_interval = current_block.timestamp - past_block.timestamp
            poll_interval = dynamic_poll_interval / diff - 0.5
            self.logger.info('POLL_INTERVAL : ' + str(dynamic_poll_interval) +
                             ' sleep_time : ' + str(poll_interval) +
                             ' block_number : ' +
                             str(self.current_block_number))
            past_block = self.worker_node_container.web3.eth.getBlock('latest')
            past_block_number = past_block.number
            return poll_interval, past_block, past_block_number
        else:
            return 5, past_block, past_block_number