Ejemplo n.º 1
0
def main() -> None:
    # Determine the internal IP address of this node.
    my_internal_ip = utils.detect_ip()
    log.info('My internal IP address is `{}`'.format(my_internal_ip))

    master_node_count = get_expected_master_node_count()
    log.info('Expected number of DC/OS master nodes: %s', master_node_count)

    set_num_replicas(my_internal_ip, master_node_count)
Ejemplo n.º 2
0
def main() -> None:
    # Determine the internal IP address of this node.
    my_internal_ip = utils.detect_ip()
    log.info('My internal IP address is `{}`'.format(my_internal_ip))

    master_node_count = get_expected_master_node_count()
    log.info('Expected number of DC/OS master nodes: %s', master_node_count)

    set_num_replicas(my_internal_ip, master_node_count)

    # We are running CockroachDB v2.0.x so pass '2.0'.
    set_cluster_version(my_internal_ip, '2.0')
def main() -> None:
    # Determine the internal IP address of this node.
    my_internal_ip = utils.detect_ip()
    log.info('My internal IP address is `{}`'.format(my_internal_ip))

    master_node_count = get_expected_master_node_count()
    log.info('Expected number of DC/OS master nodes: %s', master_node_count)

    set_num_replicas(my_internal_ip, master_node_count)

    # We are running CockroachDB v2.0.x so pass '2.0'.
    set_cluster_version(my_internal_ip, '2.0')
Ejemplo n.º 4
0
def main() -> None:
    # Determine the internal IP address of this node.
    my_internal_ip = utils.detect_ip()
    log.info('My internal IP address is `{}`'.format(my_internal_ip))

    args = _parse_args()
    if args.backup_file_path:
        log.info('Write backup to: {}'.format(args.backup_file_path))
    else:
        log.info('Write backup to: STDOUT')

    if args.backup_file_path:
        with open(args.backup_file_path, 'wb') as f:
            dump_database(my_internal_ip=my_internal_ip, out=f)
    else:
        dump_database(my_internal_ip=my_internal_ip, out=sys.stdout)
Ejemplo n.º 5
0
def main() -> None:
    # Determine the internal IP address of this node.
    my_internal_ip = utils.detect_ip()
    log.info('My internal IP address is `{}`'.format(my_internal_ip))

    args = _parse_args()
    log.info('Backup filepath: {}'.format(args.backup_file_path))

    log.info('Begin IAM database restore procedure.')
    # Add sub-second timestamp resolution to the suffix so that this script can
    # be run twice within a second.
    # See https://jira.mesosphere.com/browse/DCOS-42407
    try:
        recover_database(
            my_internal_ip=my_internal_ip,
            backup_file_path=args.backup_file_path,
            db_suffix=datetime.now().strftime('%Y%m%d_%H%M%S_%f'),
        )
    except subprocess.CalledProcessError:
        log.error("Failed to restore IAM database.")
        sys.exit(1)

    log.info('IAM database restored successfully.')
Ejemplo n.º 6
0
def main() -> None:
    logging.basicConfig(format='[%(levelname)s] %(message)s', level='INFO')

    # Determine our internal IP.
    my_ip = utils.detect_ip()
    log.info("My IP is `{}`".format(my_ip))

    # Connect to ZooKeeper.
    log.info("Connecting to ZooKeeper.")
    zk_user = os.environ.get('DATASTORE_ZK_USER')
    zk_secret = os.environ.get('DATASTORE_ZK_SECRET')
    zk = zk_connect(zk_user=zk_user, zk_secret=zk_secret)
    # We are connected to ZooKeeper.

    # Ensure that the ZNodes exist.
    zk.ensure_path("/cockroach")
    zk.ensure_path("/cockroach/nodes")
    zk.ensure_path("/cockroach/locking")

    # Determine whether the cluster has been bootstrapped already by
    # checking whether the `ZK_NODES_PATH` ZNode has children. This is
    # best-effort as we aren't holding the lock, but we do call
    # `zk.sync()` which is supposed to ensure that we read the latest
    # value from ZK.
    nodes = _get_registered_nodes(zk=zk, zk_path=ZK_NODES_PATH)
    if nodes:
        # The cluster has already been initialized. Dump the node IPs to
        # `NODES_FILE_PATH` and exit.
        log.info("Cluster has members registered already: {}".format(nodes))
        if my_ip not in nodes:
            log.info(
                "IP not found in list of nodes. Registering cluster membership."
            )
            with _zk_lock(zk=zk,
                          lock_path=ZK_LOCK_PATH,
                          contender_id=LOCK_CONTENDER_ID,
                          timeout=ZK_LOCK_TIMEOUT):
                nodes = _register_cluster_membership(zk=zk,
                                                     zk_path=ZK_NODES_PATH,
                                                     ip=my_ip)
        _dump_nodes_to_file(nodes, NODES_FILE_PATH)
        log.info("Registration complete. ")
        return

    # No cockroachdb nodes have been registered with ZK yet. We
    # assume that we need to bootstrap the cluster so we take the ZK
    # lock and hold it until the cluster is bootstrapped and our IP
    # has been successfully registered with ZK.
    #
    # The lock needs to be held around the entire cockroachdb startup
    # procedure as only the first instance should start without the
    # --join parameter (and thereby bootstrap the cluster.) This lock
    # prevents multiple instances from starting without --join at the
    # same time.
    #
    # If we fail to acquire the lock it means a peer is already
    # bootstrapping the cluster. We should crash and when we get
    # restarted by systemd, we expect to see that the cluster has been
    # bootstrapped and will enter that alternative code path which
    # leads to an eventually converged cluster.
    with _zk_lock(zk=zk,
                  lock_path=ZK_LOCK_PATH,
                  contender_id=LOCK_CONTENDER_ID,
                  timeout=ZK_LOCK_TIMEOUT):
        # We check that the cluster hasn't been bootstrapped since we
        # first read the list of nodes from ZK.
        log.info("Checking for registered nodes while holding lock.")
        nodes = _get_registered_nodes(zk=zk, zk_path=ZK_NODES_PATH)
        if nodes:
            # The cluster has been bootstrapped since we checked. We join the
            # existing cluster and dump the node IPs.
            log.info("Cluster has already been initialized: {}".format(nodes))
            nodes = _register_cluster_membership(zk=zk,
                                                 zk_path=ZK_NODES_PATH,
                                                 ip=my_ip)
            _dump_nodes_to_file(nodes, NODES_FILE_PATH)
            return
        else:
            log.info("Cluster has not been initialized yet.")
            # The cluster still has not been bootstrapped. We start
            # cockroachdb without a list of cluster IPs to join,
            # which will cause it to bootstrap the cluster.
            _init_cockroachdb_cluster(ip=my_ip)
            # Only now that the CockroachDB cluster has been initialized, we
            # add our IP to the list of nodes that have successfully joined the
            # cluster at one stage or another.
            #
            # If this fails the fact that a cluster was initialized will be
            # ignored by subsequent runs as our IP won't be present in ZK.
            nodes = _register_cluster_membership(zk=zk,
                                                 zk_path=ZK_NODES_PATH,
                                                 ip=my_ip)
            _dump_nodes_to_file(nodes, NODES_FILE_PATH)
            log.info("Successfully initialized cluster.")
            return
Ejemplo n.º 7
0
def main() -> None:
    logging.basicConfig(format='[%(levelname)s] %(message)s', level='INFO')

    # Determine our internal IP.
    my_ip = utils.detect_ip()
    log.info("My IP is `{}`".format(my_ip))

    # Connect to ZooKeeper.
    log.info("Connecting to ZooKeeper.")
    zk_user = os.environ.get('DATASTORE_ZK_USER')
    zk_secret = os.environ.get('DATASTORE_ZK_SECRET')
    zk = zk_connect(zk_user=zk_user, zk_secret=zk_secret)
    # We are connected to ZooKeeper.

    # Ensure that the ZNodes exist.
    zk.ensure_path("/cockroach")
    zk.ensure_path("/cockroach/nodes")
    zk.ensure_path("/cockroach/locking")

    # Determine whether the cluster has been bootstrapped already by
    # checking whether the `ZK_NODES_PATH` ZNode has children. This is
    # best-effort as we aren't holding the lock, but we do call
    # `zk.sync()` which is supposed to ensure that we read the latest
    # value from ZK.
    nodes = _get_registered_nodes(zk=zk, zk_path=ZK_NODES_PATH)
    if nodes:
        # The cluster has already been initialized. Dump the node IPs to
        # `NODES_FILE_PATH` and exit.
        log.info("Cluster has members registered already: {}".format(nodes))
        if my_ip not in nodes:
            log.info("IP not found in list of nodes. Registering cluster membership.")
            with _zk_lock(zk=zk, lock_path=ZK_LOCK_PATH, contender_id=LOCK_CONTENDER_ID, timeout=ZK_LOCK_TIMEOUT):
                nodes = _register_cluster_membership(zk=zk, zk_path=ZK_NODES_PATH, ip=my_ip)
        _dump_nodes_to_file(nodes, NODES_FILE_PATH)
        log.info("Registration complete. ")
        return

    # No cockroachdb nodes have been registered with ZK yet. We
    # assume that we need to bootstrap the cluster so we take the ZK
    # lock and hold it until the cluster is bootstrapped and our IP
    # has been successfully registered with ZK.
    #
    # The lock needs to be held around the entire cockroachdb startup
    # procedure as only the first instance should start without the
    # --join parameter (and thereby bootstrap the cluster.) This lock
    # prevents multiple instances from starting without --join at the
    # same time.
    #
    # If we fail to acquire the lock it means a peer is already
    # bootstrapping the cluster. We should crash and when we get
    # restarted by systemd, we expect to see that the cluster has been
    # bootstrapped and will enter that alternative code path which
    # leads to an eventually converged cluster.
    with _zk_lock(zk=zk, lock_path=ZK_LOCK_PATH, contender_id=LOCK_CONTENDER_ID, timeout=ZK_LOCK_TIMEOUT):
        # We check that the cluster hasn't been bootstrapped since we
        # first read the list of nodes from ZK.
        log.info("Checking for registered nodes while holding lock.")
        nodes = _get_registered_nodes(zk=zk, zk_path=ZK_NODES_PATH)
        if nodes:
            # The cluster has been bootstrapped since we checked. We join the
            # existing cluster and dump the node IPs.
            log.info("Cluster has already been initialized: {}".format(nodes))
            nodes = _register_cluster_membership(zk=zk, zk_path=ZK_NODES_PATH, ip=my_ip)
            _dump_nodes_to_file(nodes, NODES_FILE_PATH)
            return
        else:
            log.info("Cluster has not been initialized yet.")
            # The cluster still has not been bootstrapped. We start
            # cockroachdb without a list of cluster IPs to join,
            # which will cause it to bootstrap the cluster.
            _init_cockroachdb_cluster(ip=my_ip)
            # Only now that the CockroachDB cluster has been initialized, we
            # add our IP to the list of nodes that have successfully joined the
            # cluster at one stage or another.
            #
            # If this fails the fact that a cluster was initialized will be
            # ignored by subsequent runs as our IP won't be present in ZK.
            nodes = _register_cluster_membership(zk=zk, zk_path=ZK_NODES_PATH, ip=my_ip)
            _dump_nodes_to_file(nodes, NODES_FILE_PATH)
            log.info("Successfully initialized cluster.")
            return