Ejemplo n.º 1
0
class TeosDaemon:
    """
    The :class:`TeosDaemon` organizes the code to initialize all the components of teos, start the service, stop and
    teardown.

    Args:
        config (:obj:`dict`): the configuration object.
        sk (:obj:`PrivateKey`): the :obj:`PrivateKey` of the tower.
        logger (:obj:`Logger <teos.logger.Logger>`): the logger instance.
        logging_port (:obj:`int`): the port where the logging server can be reached (localhost:logging_port)
        stop_log_event (:obj:`multiprocessing.Event`): the event to signal a stop to the logging server
        logging_process (:obj:`multiprocessing.Process`): the logging server process

    Attributes:
        stop_command_event (:obj:`threading.Event`): The event that will be set to initiate a graceful shutdown.
        stop_event (:obj:`multiprocessing.Event`): The event that services running on different processes will monitor
            in order to be informed that they should shutdown.
        block_processor (:obj:`teos.block_processor.BlockProcessor`): The block processor instance.
        db_manager (:obj:`teos.appointments_dbm.AppointmentsDBM`): The db manager for appointments.
        watcher (:obj:`teos.watcher.Watcher`): The watcher instance.
        watcher_thread (:obj:`multithreading.Thread`): After ``bootstrap_components``, the thread that
            runs the Watcher monitoring (set to :obj:`None` beforehand).
        responder_thread (:obj:`multithreading.Thread`): After ``bootstrap_components``, the thread that
            runs the Responder monitoring (set to :obj:`None` beforehand).
        chain_monitor (:obj:`teos.chain_monitor.ChainMonitor`): The ``ChainMonitor`` instance.
        internal_api_endpoint (:obj:`str`): The full host name and port of the internal api.
        internal_api (:obj:`teos.internal_api.InternalAPI`): The InternalAPI instance.
        api_proc (:obj:`subprocess.Popen` or :obj:`multiprocessing.Process`): Once the rpc process
            is created, the instance of either ``Popen`` or ``Process`` that is serving the public API (set to
            :obj:`None` beforehand).
        rpc_process (:obj:`multiprocessing.Process`): The instance of the internal RPC server; only set if running.
    """
    def __init__(self, config, sk, logger, logging_port, stop_log_event,
                 logging_process):
        self.config = config
        self.logger = logger
        self.logging_port = logging_port
        self.stop_log_event = stop_log_event
        self.logging_process = logging_process

        # event triggered when a ``stop`` command is issued
        # Using multiprocessing.Event seems to cause a deadlock if event.set() is called in a signal handler that
        # interrupted event.wait(). This does not happen with threading.Event.
        # See https://bugs.python.org/issue41606
        self.stop_command_event = threading.Event()

        # event triggered when the public API is halted, hence teosd is ready to stop
        self.stop_event = multiprocessing.Event()

        bitcoind_connect_params = {
            k: v
            for k, v in config.items() if k.startswith("BTC_RPC")
        }
        bitcoind_feed_params = {
            k: v
            for k, v in config.items() if k.startswith("BTC_FEED")
        }

        bitcoind_reachable = threading.Event()
        if not can_connect_to_bitcoind(bitcoind_connect_params):
            raise RuntimeError("Cannot connect to bitcoind")
        elif not in_correct_network(bitcoind_connect_params,
                                    config.get("BTC_NETWORK")):
            raise RuntimeError(
                "bitcoind is running on a different network, check teos.conf and bitcoin.conf"
            )
        else:
            bitcoind_reachable.set()

        self.logger.info("tower_id = {}".format(
            Cryptographer.get_compressed_pk(sk.public_key)))
        self.block_processor = BlockProcessor(bitcoind_connect_params,
                                              bitcoind_reachable)
        carrier = Carrier(bitcoind_connect_params, bitcoind_reachable)

        gatekeeper = Gatekeeper(
            UsersDBM(self.config.get("USERS_DB_PATH")),
            self.block_processor,
            self.config.get("SUBSCRIPTION_SLOTS"),
            self.config.get("SUBSCRIPTION_DURATION"),
            self.config.get("EXPIRY_DELTA"),
        )
        self.db_manager = AppointmentsDBM(
            self.config.get("APPOINTMENTS_DB_PATH"))
        responder = Responder(self.db_manager, gatekeeper, carrier,
                              self.block_processor)
        self.watcher = Watcher(
            self.db_manager,
            gatekeeper,
            self.block_processor,
            responder,
            sk,
            self.config.get("MAX_APPOINTMENTS"),
            self.config.get("LOCATOR_CACHE_SIZE"),
        )

        self.watcher_thread = None
        self.responder_thread = None

        # Create the chain monitor
        self.chain_monitor = ChainMonitor(
            [
                self.watcher.block_queue, responder.block_queue,
                gatekeeper.block_queue
            ],
            self.block_processor,
            bitcoind_feed_params,
        )

        # Set up the internal API
        self.internal_api_endpoint = f'{self.config.get("INTERNAL_API_HOST")}:{self.config.get("INTERNAL_API_PORT")}'
        self.internal_api = InternalAPI(
            self.watcher, self.internal_api_endpoint,
            self.config.get("INTERNAL_API_WORKERS"), self.stop_command_event)

        # Create the rpc, without starting it
        self.rpc_process = multiprocessing.Process(
            target=rpc.serve,
            args=(
                self.config.get("RPC_BIND"),
                self.config.get("RPC_PORT"),
                self.internal_api_endpoint,
                self.logging_port,
                self.stop_event,
            ),
            daemon=True,
        )

        # This variables will contain the handle of the process running the API, when the service is started.
        # It will be an instance of either Popen or Process, depending on the WSGI config setting.
        self.api_proc = None

    def bootstrap_components(self):
        """
        Performs the initial setup of the components. It loads the appointments and tracker for the watcher and the
        responder (if any), and awakes the components. It also populates the block queues with any missing data, in
        case the tower has been offline for some time. Finally, it starts the chain monitor.
        """

        # Make sure that the ChainMonitor starts listening to new blocks while we bootstrap
        self.chain_monitor.monitor_chain()

        watcher_appointments_data = self.db_manager.load_watcher_appointments()
        responder_trackers_data = self.db_manager.load_responder_trackers()

        if len(watcher_appointments_data) == 0 and len(
                responder_trackers_data) == 0:
            self.logger.info("Fresh bootstrap")

            self.watcher_thread = self.watcher.awake()
            self.responder_thread = self.watcher.responder.awake()

        else:
            self.logger.info("Bootstrapping from backed up data")

            # Update the Watcher backed up data if found.
            if len(watcher_appointments_data) != 0:
                self.watcher.appointments, self.watcher.locator_uuid_map = Builder.build_appointments(
                    watcher_appointments_data)

            # Update the Responder with backed up data if found.
            if len(responder_trackers_data) != 0:
                self.watcher.responder.trackers, self.watcher.responder.tx_tracker_map = Builder.build_trackers(
                    responder_trackers_data)

            # Awaking components so the states can be updated.
            self.watcher_thread = self.watcher.awake()
            self.responder_thread = self.watcher.responder.awake()

            last_block_watcher = self.db_manager.load_last_block_hash_watcher()
            last_block_responder = self.db_manager.load_last_block_hash_responder(
            )

            # Populate the block queues with data if they've missed some while offline. If the blocks of both match
            # we don't perform the search twice.

            # FIXME: 32-reorgs-offline dropped txs are not used at this point.
            last_common_ancestor_watcher, dropped_txs_watcher = self.block_processor.find_last_common_ancestor(
                last_block_watcher)
            missed_blocks_watcher = self.block_processor.get_missed_blocks(
                last_common_ancestor_watcher)

            if last_block_watcher == last_block_responder:
                dropped_txs_responder = dropped_txs_watcher
                missed_blocks_responder = missed_blocks_watcher

            else:
                last_common_ancestor_responder, dropped_txs_responder = self.block_processor.find_last_common_ancestor(
                    last_block_responder)
                missed_blocks_responder = self.block_processor.get_missed_blocks(
                    last_common_ancestor_responder)

            # If only one of the instances needs to be updated, it can be done separately.
            if len(missed_blocks_watcher
                   ) == 0 and len(missed_blocks_responder) != 0:
                Builder.populate_block_queue(
                    self.watcher.responder.block_queue,
                    missed_blocks_responder)
                self.watcher.responder.block_queue.join()

            elif len(missed_blocks_responder
                     ) == 0 and len(missed_blocks_watcher) != 0:
                Builder.populate_block_queue(self.watcher.block_queue,
                                             missed_blocks_watcher)
                self.watcher.block_queue.join()

            # Otherwise they need to be updated at the same time, block by block
            elif len(missed_blocks_responder) != 0 and len(
                    missed_blocks_watcher) != 0:
                Builder.update_states(
                    self.watcher.block_queue,
                    self.watcher.responder.block_queue,
                    missed_blocks_watcher,
                    missed_blocks_responder,
                )

        # Activate ChainMonitor
        self.chain_monitor.activate()

    def start_services(self, logging_port):
        """
        Readies the tower by setting up signal handling, and starting all the services.

        Args:
            logging_port (:obj:`int`): the port where the logging server can be reached (localhost:logging_port)
        """

        signal(SIGINT, self.handle_signals)
        signal(SIGTERM, self.handle_signals)
        signal(SIGQUIT, self.handle_signals)

        # Start the rpc process
        self.rpc_process.start()

        # Start the internal API
        # This MUST be done after rpc_process.start to avoid the issue that was solved in
        # https://github.com/talaia-labs/python-teos/pull/198
        self.internal_api.rpc_server.start()
        self.logger.info(
            f"Internal API initialized. Serving at {self.internal_api_endpoint}"
        )

        # Start the public API server
        api_endpoint = f"{self.config.get('API_BIND')}:{self.config.get('API_PORT')}"
        if self.config.get("WSGI") == "gunicorn":
            # FIXME: We may like to add workers depending on a config value
            teos_folder = os.path.dirname(os.path.realpath(__file__))
            self.api_proc = subprocess.Popen(
                [
                    "gunicorn",
                    f"--config={os.path.join(teos_folder, 'gunicorn_config.py')}",
                    f"--bind={api_endpoint}",
                    f"teos.api:serve(internal_api_endpoint='{self.internal_api_endpoint}', "
                    f"endpoint='{api_endpoint}', logging_port='{logging_port}', "
                    f"min_to_self_delay='{self.config.get('MIN_TO_SELF_DELAY')}')",
                ],
                env={
                    **os.environ,
                    **{
                        "LOG_SERVER_PORT": str(logging_port)
                    }
                },
            )
        else:
            self.api_proc = multiprocessing.Process(
                target=api.serve,
                kwargs={
                    "internal_api_endpoint": self.internal_api_endpoint,
                    "endpoint": api_endpoint,
                    "logging_port": logging_port,
                    "min_to_self_delay": self.config.get("MIN_TO_SELF_DELAY"),
                    "auto_run": True,
                },
            )
            self.api_proc.start()

    def handle_signals(self, signum, frame):
        """Handles signals by initiating a graceful shutdown."""
        self.logger.debug(f"Signal {signum} received. Stopping")

        self.stop_command_event.set()

    def teardown(self):
        """Shuts down all services and closes the DB, then exits. This method does not return."""
        self.logger.info("Terminating public API")

        # Stop the public API first
        if isinstance(self.api_proc, subprocess.Popen):
            self.api_proc.terminate()
            self.api_proc.wait()
        elif isinstance(self.api_proc, multiprocessing.Process):
            # FIXME: using SIGKILL for now, adapt it to use SIGTERM so the shutdown can be grateful
            self.api_proc.kill()
            self.api_proc.join()

        self.logger.info("Public API terminated")

        # Signals readiness to shutdown to the other processes
        self.stop_event.set()

        # wait for RPC process to shutdown
        self.rpc_process.join()

        # Stops the internal API, after waiting for some grace time
        self.logger.info("Stopping internal API")
        self.internal_api.rpc_server.stop(SHUTDOWN_GRACE_TIME).wait()
        self.logger.info("Internal API stopped")

        # terminate the ChainMonitor
        self.chain_monitor.terminate()

        # wait for watcher and responder to finish processing their queues
        self.watcher_thread.join()
        self.responder_thread.join()

        self.logger.info("Closing connection with appointments db")
        self.db_manager.close()
        self.logger.info("Closing connection with users db")
        self.watcher.gatekeeper.user_db.close()

        self.logger.info("Shutting down TEOS")
        self.stop_log_event.set()
        self.logging_process.join()
        exit(0)

    def start(self):
        """This method implements the whole lifetime cycle of the the TEOS tower. This method does not return."""
        self.logger.info("Starting TEOS")
        self.bootstrap_components()
        self.start_services(self.logging_port)

        self.stop_command_event.wait()
        self.teardown()
Ejemplo n.º 2
0
def main(command_line_conf):
    global db_manager, chain_monitor

    try:
        signal(SIGINT, handle_signals)
        signal(SIGTERM, handle_signals)
        signal(SIGQUIT, handle_signals)

        # Loads config and sets up the data folder and log file
        data_dir = command_line_conf.pop(
            "DATA_DIR") if "DATA_DIR" in command_line_conf else DATA_DIR
        config_loader = ConfigLoader(data_dir, CONF_FILE_NAME, DEFAULT_CONF,
                                     command_line_conf)
        config = config_loader.build_config()

        # Set default RPC port if not overwritten by the user.
        if "BTC_RPC_PORT" not in config_loader.overwritten_fields:
            config["BTC_RPC_PORT"] = get_default_rpc_port(
                config.get("BTC_NETWORK"))

        setup_data_folder(data_dir)
        setup_logging(config.get("LOG_FILE"), LOG_PREFIX)

        logger.info("Starting TEOS")

        bitcoind_connect_params = {
            k: v
            for k, v in config.items() if k.startswith("BTC")
        }
        bitcoind_feed_params = {
            k: v
            for k, v in config.items() if k.startswith("BTC_FEED")
        }

        if not can_connect_to_bitcoind(bitcoind_connect_params):
            logger.error("Cannot connect to bitcoind. Shutting down")

        elif not in_correct_network(bitcoind_connect_params,
                                    config.get("BTC_NETWORK")):
            logger.error(
                "bitcoind is running on a different network, check conf.py and bitcoin.conf. Shutting down"
            )

        else:
            secret_key_der = Cryptographer.load_key_file(
                config.get("TEOS_SECRET_KEY"))
            if not secret_key_der:
                raise IOError("TEOS private key cannot be loaded")

            logger.info("tower_id = {}".format(
                Cryptographer.get_compressed_pk(
                    Cryptographer.load_private_key_der(
                        secret_key_der).public_key)))
            block_processor = BlockProcessor(bitcoind_connect_params)
            carrier = Carrier(bitcoind_connect_params)

            gatekeeper = Gatekeeper(
                UsersDBM(config.get("USERS_DB_PATH")),
                block_processor,
                config.get("SUBSCRIPTION_SLOTS"),
                config.get("SUBSCRIPTION_DURATION"),
                config.get("EXPIRY_DELTA"),
            )
            db_manager = AppointmentsDBM(config.get("APPOINTMENTS_DB_PATH"))
            responder = Responder(db_manager, gatekeeper, carrier,
                                  block_processor)
            watcher = Watcher(
                db_manager,
                gatekeeper,
                block_processor,
                responder,
                secret_key_der,
                config.get("MAX_APPOINTMENTS"),
                config.get("LOCATOR_CACHE_SIZE"),
            )

            # Create the chain monitor and start monitoring the chain
            chain_monitor = ChainMonitor(watcher.block_queue,
                                         watcher.responder.block_queue,
                                         block_processor, bitcoind_feed_params)

            watcher_appointments_data = db_manager.load_watcher_appointments()
            responder_trackers_data = db_manager.load_responder_trackers()

            if len(watcher_appointments_data) == 0 and len(
                    responder_trackers_data) == 0:
                logger.info("Fresh bootstrap")

                watcher.awake()
                watcher.responder.awake()

            else:
                logger.info("Bootstrapping from backed up data")

                # Update the Watcher backed up data if found.
                if len(watcher_appointments_data) != 0:
                    watcher.appointments, watcher.locator_uuid_map = Builder.build_appointments(
                        watcher_appointments_data)

                # Update the Responder with backed up data if found.
                if len(responder_trackers_data) != 0:
                    watcher.responder.trackers, watcher.responder.tx_tracker_map = Builder.build_trackers(
                        responder_trackers_data)

                # Awaking components so the states can be updated.
                watcher.awake()
                watcher.responder.awake()

                last_block_watcher = db_manager.load_last_block_hash_watcher()
                last_block_responder = db_manager.load_last_block_hash_responder(
                )

                # Populate the block queues with data if they've missed some while offline. If the blocks of both match
                # we don't perform the search twice.

                # FIXME: 32-reorgs-offline dropped txs are not used at this point.
                last_common_ancestor_watcher, dropped_txs_watcher = block_processor.find_last_common_ancestor(
                    last_block_watcher)
                missed_blocks_watcher = block_processor.get_missed_blocks(
                    last_common_ancestor_watcher)

                if last_block_watcher == last_block_responder:
                    dropped_txs_responder = dropped_txs_watcher
                    missed_blocks_responder = missed_blocks_watcher

                else:
                    last_common_ancestor_responder, dropped_txs_responder = block_processor.find_last_common_ancestor(
                        last_block_responder)
                    missed_blocks_responder = block_processor.get_missed_blocks(
                        last_common_ancestor_responder)

                # If only one of the instances needs to be updated, it can be done separately.
                if len(missed_blocks_watcher
                       ) == 0 and len(missed_blocks_responder) != 0:
                    Builder.populate_block_queue(watcher.responder.block_queue,
                                                 missed_blocks_responder)
                    watcher.responder.block_queue.join()

                elif len(missed_blocks_responder
                         ) == 0 and len(missed_blocks_watcher) != 0:
                    Builder.populate_block_queue(watcher.block_queue,
                                                 missed_blocks_watcher)
                    watcher.block_queue.join()

                # Otherwise they need to be updated at the same time, block by block
                elif len(missed_blocks_responder) != 0 and len(
                        missed_blocks_watcher) != 0:
                    Builder.update_states(watcher, missed_blocks_watcher,
                                          missed_blocks_responder)

            # Fire the API and the ChainMonitor
            # FIXME: 92-block-data-during-bootstrap-db
            chain_monitor.monitor_chain()
            inspector = Inspector(block_processor,
                                  config.get("MIN_TO_SELF_DELAY"))
            API(config.get("API_BIND"), config.get("API_PORT"), inspector,
                watcher).start()
    except Exception as e:
        logger.error("An error occurred: {}. Shutting down".format(e))
        exit(1)