def on_leaving_cluster(self, cluster_view):

        # Cassandra commands need to be run in the signaling network
        # namespace in split network systems.
        #
        # This function means that there are now two ways of running a
        # command in the signaling namespace - this function, and the
        # namespace parameter to run_command.  This plugin does not have
        # access to the signaling namespace name, so we use this function
        # as a tactical workaround.
        def in_sig_namespace(command):
            prefix = "/usr/share/clearwater/bin/run-in-signaling-namespace "
            return prefix + command

        # We must remove the node from the cassandra cluster. Get the node's ID
        # from nodetool status, then remove it with nodetool remove
        try:
            status_command = "nodetool status | grep " + self._ip
            output = subprocess.check_output(in_sig_namespace(status_command),
                                             shell=True,
                                             stderr=subprocess.STDOUT)
            _log.debug(
                "Nodetool status succeeded and printed output {!r}".format(
                    output))
        except subprocess.CalledProcessError:  # pragma: no coverage
            _log.debug("hit error")

        if output != "":
            # Pull the UUID from the output
            for value in output.split():
                if "-" in value:
                    remove_command = "nodetool removenode " + value
                    run_command(in_sig_namespace(remove_command))
                    break
    def on_config_changed(self, value, alarm):
        _log.info("Updating the shared iFC sets configuration file")

        if self.status(value) != FileStatus.UP_TO_DATE:
            safely_write(_file, value)
            run_command(["/usr/share/clearwater/bin/reload_shared_ifcs_xml"])
            alarm.update_file(_file)
    def on_config_changed(self, value, alarm):
        _log.info("Updating {}".format(self._file))

        safely_write(self._file, value)

        run_command("service sprout reload")

        alarm.update_file(self._file)
Example #4
0
    def on_config_changed(self, value, alarm):
        _log.info("Updating {}".format(self._file))

        safely_write(self._file, value)

        run_command("service sprout reload")

        alarm.update_file(self._file)
Example #5
0
    def on_config_changed(self, value, alarm):
        _log.info("Updating {}".format(self._file))

        with open(self._file, "w") as ofile:
            ofile.write(value);

        run_command("service sprout reload")

        alarm.update_file(self._file)
Example #6
0
    def on_config_changed(self, value, alarm):
        _log.info("Updating shared configuration file")

        if self.status(value) != FileStatus.UP_TO_DATE:
            safely_write(_file, value)
            if value != _default_value:
                run_command(
                    "/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue add apply_config"
                )
    def on_config_changed(self, value, alarm):
        _log.info("Updating Chronos shared configuration file")

        if self.status(value) != FileStatus.UP_TO_DATE:
            safely_write(_file, value)
            run_command([
                "/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue",
                "add", "apply_chronos_shared_config"
            ])
            alarm.update_file(_file)
Example #8
0
    def on_stable_cluster(self, cluster_view):  # pragma: no cover
        _log.debug("Clearing Cassandra not-clustered alarm")
        self._clustering_alarm.clear()
        pdlogs.STABLE_CLUSTER.log(cluster_desc=self.cluster_description())

        if (self._ip == sorted(cluster_view.keys())[0]):
            _log.debug("Adding schemas")
            run_command(
                "/usr/share/clearwater/infrastructure/scripts/cassandra_schemas/run_cassandra_schemas"
            )
 def at_front_of_queue(self):
     _log.info("Restarting Chronos")
     if run_command("service chronos stop"):
         _log.warning("Unable to stop Chronos successfully")
     if run_command("service chronos wait-sync"):
         _log.warning("Unable to resync Chronos successfully")
     if run_command(
             "/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue remove_success apply_chronos_gr_config"
     ):
         _log.warning("Unable to remove this node from the resync queue")
     _log.info("Chronos restarted")
Example #10
0
    def on_config_changed(self, value, alarm):
        _log.info("Updating SAS configuration file")

        if self.status(value) != FileStatus.UP_TO_DATE:
            safely_write(_file, value)

            run_command(["/usr/share/clearwater/infrastructure/scripts/sas_socket_factory"])
            apply_config_key = subprocess.check_output(["/usr/share/clearwater/clearwater-queue-manager/scripts/get_apply_config_key"])
            run_command(["/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue",
                         "add", apply_config_key])

            alarm.update_file(_file)
    def wait_for_cassandra(self):
        # Don't start Cassandra, just rely on monit or supervisord to start it
        # - this avoids race conditions where both we and monit start it at the
        # same time and two copies start up.

        _log.info("Waiting for Cassandra to come up...")
        # Wait until we can connect on port 9160 - i.e. Cassandra is running.
        attempts = 0;
        while not self.can_contact_cassandra():  # pragma: no cover
            # Sleep so we don't tight loop
            time.sleep(1)
            attempts += 1
            if ((attempts % 10) == 0):
                _log.info("Still waiting for Cassandra to come up...")

        _log.info("Finished waiting for Cassandra to come up")
        # Restart clearwater-infrastructure so any necessary schema creation
        # scripts get run
        run_command(["sudo", "service", "clearwater-infrastructure", "restart"])
    def leave_cassandra_cluster(self):
        # We need Cassandra to be running so that we can connect on port 9160 and
        # decommission it. Check if we can connect on port 9160.
        if not self.can_contact_cassandra():  # pragma: no cover
            self.wait_for_cassandra()

        # Remove the cassandra.yaml file first - Cassandra won't start up while
        # it's missing, so this prevents monit or supervisord from
        # auto-restarting it after decommissioning.
        if os.path.exists(self.CASSANDRA_YAML_FILE):
            os.remove(self.CASSANDRA_YAML_FILE)

        run_command(["nodetool", "decommission"], self._sig_namespace)

        # Remove the bootstrapping flags so that we bootstrap correctly
        # if rejoining the cluster again in future.
        if os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG):
            os.remove(self.BOOTSTRAP_IN_PROGRESS_FLAG)
        if os.path.exists(self.BOOTSTRAPPED_FLAG):
            os.remove(self.BOOTSTRAPPED_FLAG)
    def remove_node(self):   # pragma: no cover
        try:
            args = ["/usr/share/clearwater/bin/run-in-signaling-namespace",
                    "nodetool", "status"]
            process_nodetool = subprocess.Popen(args, stdout=subprocess.PIPE)
            process_grep = subprocess.Popen(['grep', self._ip], 
                    stdin=process_nodetool.stdout, stdout=subprocess.PIPE)

            process_nodetool.stdout.close()
            output = process_grep.communicate()[0]
            _log.debug("Nodetool status succeeded and printed output {!r}".
                       format(output))
        except subprocess.CalledProcessError:  # pragma: no coverage
            _log.debug("hit error")

        if output != "":
            # Pull the UUID from the output
            for value in output.split():
                if "-" in value:
                    remove_command = ["/usr/share/clearwater/bin/run-in-signaling-namespace", "nodetool", "removenode", value]
                    run_command(remove_command)
                    break
    def on_config_changed(self, value, alarm):
        if os.path.exists(_file) and not os.path.exists(_file + ".apply"):
            _log.debug("Ignoring shared config change - Shared config already learnt")
            return

        _log.info("Updating shared configuration")
        with open(_file + ".tmp", "w") as ofile:
            ofile.write(value)
        shutil.move(_file + ".tmp", _file)

        _log.info("Restarting services")
        run_command("service clearwater-infrastructure restart")
        for service, command in services.iteritems():          
            if os.path.exists("/etc/init.d/" + service): 
                run_command("service {} {}".format(service, command))

        # Config file is now up-to-date
        alarm.update_file(_file)

        # Remove the apply file if present.
        try:
            os.remove(_file + ".apply")
        except OSError:
            pass
    def add_cassandra_schemas(self):  # pragma: no cover
        # Adding the schemas can fail when Cassandra has just started. If it
        # does fail, simply try again until it succeeds.
        _log.info("Trying to add/update the Cassandra schemas...")
        attempts = 0;

        while True:
            rc = run_command(["/usr/share/clearwater/infrastructure/scripts/cassandra_schemas/run_cassandra_schemas"])

            if rc == 0:
                break

            time.sleep(1)
            attempts += 1

            if ((attempts % 10) == 0):
                _log.info("Still trying to add/update the Cassandra schemas...")

        _log.info("Finished adding/updating the schemas")
Example #16
0
    def at_front_of_queue(self):
        _log.info("Restarting clearwater-infrastructure")
        run_command("service clearwater-infrastructure restart")

        if os.path.exists("/usr/share/clearwater/infrastructure/scripts/restart"):
            _log.info("Restarting services")
            for restart_script in os.listdir("/usr/share/clearwater/infrastructure/scripts/restart"):
                run_command("/usr/share/clearwater/infrastructure/scripts/restart/" + restart_script)
 
        if self._wait_plugin_complete != "N":
            _log.info("Checking service health")
            if run_command("/usr/share/clearwater/clearwater-queue-manager/scripts/check_node_health.py"):
                _log.info("Services failed to restart successfully")
                run_command("/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue remove_failure apply_config")
            else:
                _log.info("Services restarted successfully")
                run_command("/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue remove_success apply_config")
        else:
            _log.info("Not checking service health")
            run_command("/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue remove_success apply_config")
    def write_new_cassandra_config(self, seeds_list, destructive_restart=False):
        seeds_list_str = ','.join(map(str, seeds_list))
        _log.info("Cassandra seeds list is {}".format(seeds_list_str))

        # Read cassandra.yaml template.
        with open(self.CASSANDRA_YAML_TEMPLATE) as f:
            doc = yaml.safe_load(f)

        # Fill in the correct listen_address and seeds values in the yaml
        # document.
        doc["listen_address"] = self._ip
        doc["broadcast_rpc_address"] = self._ip

        doc["seed_provider"][0]["parameters"][0]["seeds"] = seeds_list_str
        doc["endpoint_snitch"] = "GossipingPropertyFileSnitch"

        # We use Thrift timeouts of 250ms, and we need the Cassandra timeouts to
        # be able to time out before that, including inter-node latency, so we
        # set timeouts of 190ms for reads, range-reads and writes
        doc["read_request_timeout_in_ms"] = 190
        doc["range_request_timeout_in_ms"] = 190
        doc["write_request_timeout_in_ms"] = 190

        # Commit logs.  We want to cap these, as the default of 8GB is sufficient
        # to exhaust the root filesystem on a low-spec (20GB) node, but we should
        # allow higher spec machines to use more diskspace to avoid thrashing.
        #
        # Therefore, set the upper threshold for commit logs to be 1GB per core
        # (up to the maximum for a 64bit machine - namely 8192).
        #
        # We ignore security analysis here, as although we are shelling out,
        # we are doing so with a fixed command, so it's safe to do so. For
        # safety, we always force the result to be an integer.
        get_core_count = "grep processor /proc/cpuinfo | wc -l"
        core_count = subprocess.check_output(get_core_count, # nosec
                                             shell=True,
                                             stderr=subprocess.STDOUT)

        try:
            core_count_int = int(core_count)
        except ValueError:  #  pragma: no cover
            core_count_int = 2

        doc["commitlog_total_space_in_mb"] = min(core_count_int * 1024, 8192)

        contents = WARNING_HEADER + "\n" + yaml.dump(doc)
        topology = WARNING_HEADER + "\n" + "dc={}\nrack=RAC1\n".format(self._local_site)

        # Restart Cassandra and make sure it picks up the new list of seeds.
        _log.info("Restarting Cassandra")

        # Remove the cassandra.yaml file first - Cassandra won't start up while
        # it's missing, so this keeps it stopped while we're clearing out its
        # database
        if os.path.exists(self.CASSANDRA_YAML_FILE):
            os.remove(self.CASSANDRA_YAML_FILE)

        # Stop Cassandra directly rather than going through any 'service'
        # commands - this should mean that supervisord keeps restarting
        # Cassandra when running in Docker.
        #
        # Note that we can't use the init.d script here, because cassandra.yaml
        # doesn't exist so it immediately exits.
        #
        # We do not want to kill cassandra if it is in the process of bootstrapping
        if not os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG):
            run_command(["start-stop-daemon", "-K", "-p",
                "/var/run/cassandra/cassandra.pid", "-R", "TERM/30/KILL/5"])
            _log.info("Stopped Cassandra while changing config files")

        # We only want to perform these steps the first time we join a cluster
        # If we are bootstrapping, or already bootstrapped, doing this will leave
        # us unable to rejoin the cluster properly
        if ((destructive_restart) and not
            ((os.path.exists(self.BOOTSTRAPPED_FLAG) or
             (os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG))))):
            _log.warn("Deleting /var/lib/cassandra - this is normal on initial clustering")
            run_command(["rm", "-rf", "/var/lib/cassandra/"])
            run_command(["mkdir", "-m", "755", "/var/lib/cassandra"])
            run_command(["chown", "-R", "cassandra", "/var/lib/cassandra"])

            # Set a state flag if we have performed a destructive restart, and not yet
            # completed bootstrapping. This will stop us re-deleting the data directory
            # if the cluster_manager dies, ensuring we cluster correctly.
            open(self.BOOTSTRAP_IN_PROGRESS_FLAG, 'a').close()

        # Write back to cassandra.yaml - this allows Cassandra to start again.
        safely_write(self.CASSANDRA_TOPOLOGY_FILE, topology)
        safely_write(self.CASSANDRA_YAML_FILE, contents)

        self.wait_for_cassandra()

        # If we were previously bootstrapping, alter the state flag to indicate
        # the process is complete. We will remove this when we leave the cluster
        if os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG):
            os.rename(self.BOOTSTRAP_IN_PROGRESS_FLAG,
                      self.BOOTSTRAPPED_FLAG)

        if os.path.exists("/etc/clearwater/force_cassandra_yaml_refresh"):
            os.remove("/etc/clearwater/force_cassandra_yaml_refresh")
 def on_cluster_changing(self, cluster_view):
     write_memcached_cluster_settings("/etc/clearwater/cluster_settings",
                                      cluster_view)
     run_command("/usr/share/clearwater/bin/reload_memcached_users")
Example #19
0
    def write_new_cassandra_config(self,
                                   seeds_list,
                                   destructive_restart=False):
        seeds_list_str = ','.join(map(str, seeds_list))
        _log.info("Cassandra seeds list is {}".format(seeds_list_str))

        # Read cassandra.yaml template.
        with open(self.CASSANDRA_YAML_TEMPLATE) as f:
            doc = yaml.load(f)

        # Fill in the correct listen_address and seeds values in the yaml
        # document.
        doc["listen_address"] = self._ip
        doc["broadcast_rpc_address"] = self._ip

        doc["seed_provider"][0]["parameters"][0]["seeds"] = seeds_list_str
        doc["endpoint_snitch"] = "GossipingPropertyFileSnitch"

        # Work out the timeout from the target_latency_us value (assuming
        # 100000 if it isn't set)
        get_latency_cmd = "target_latency_us=100000; . /etc/clearwater/config; echo -n $target_latency_us"
        latency = subprocess.check_output(get_latency_cmd,
                                          shell=True,
                                          stderr=subprocess.STDOUT)

        try:
            # We want the timeout value to be 4/5ths the maximum acceptable time
            # of a HTTP request (which is 5 * target latency)
            timeout = (int(latency) / 1000) * 4
        except ValueError:  #  pragma: no cover
            timeout = 400

        doc["read_request_timeout_in_ms"] = timeout

        contents = WARNING_HEADER + "\n" + yaml.dump(doc)
        topology = WARNING_HEADER + "\n" + "dc={}\nrack=RAC1\n".format(
            self._local_site)

        # Restart Cassandra and make sure it picks up the new list of seeds.
        _log.info("Restarting Cassandra")

        # Remove the cassandra.yaml file first - Cassandra won't start up while
        # it's missing, so this keeps it stopped while we're clearing out its
        # database
        if os.path.exists(self.CASSANDRA_YAML_FILE):
            os.remove(self.CASSANDRA_YAML_FILE)

        # Stop Cassandra directly rather than going through any 'service'
        # commands - this should mean that supervisord keeps restarting
        # Cassandra when running in Docker.
        #
        # Note that we can't use the init.d script here, because cassandra.yaml
        # doesn't exist so it immediately exits.
        #
        # We do not want to kill cassandra if it is in the process of bootstrapping
        if not os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG):
            run_command(
                "start-stop-daemon -K -p /var/run/cassandra/cassandra.pid -R TERM/30/KILL/5"
            )
            _log.info("Stopped Cassandra while changing config files")

        # We only want to perform these steps the first time we join a cluster
        # If we are bootstrapping, or already bootstrapped, doing this will leave
        # us unable to rejoin the cluster properly
        if ((destructive_restart)
                and not ((os.path.exists(self.BOOTSTRAPPED_FLAG) or
                          (os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG))))):
            _log.warn(
                "Deleting /var/lib/cassandra - this is normal on initial clustering"
            )
            run_command("rm -rf /var/lib/cassandra/")
            run_command("mkdir -m 755 /var/lib/cassandra")
            run_command("chown -R cassandra /var/lib/cassandra")

            # Set a state flag if we have performed a destructive restart, and not yet
            # completed bootstrapping. This will stop us re-deleting the data directory
            # if the cluster_manager dies, ensuring we cluster correctly.
            open(self.BOOTSTRAP_IN_PROGRESS_FLAG, 'a').close()

        # Write back to cassandra.yaml - this allows Cassandra to start again.
        safely_write(self.CASSANDRA_TOPOLOGY_FILE, topology)
        safely_write(self.CASSANDRA_YAML_FILE, contents)

        self.wait_for_cassandra()

        # If we were previously bootstrapping, alter the state flag to indicate
        # the process is complete. We will remove this when we leave the cluster
        if os.path.exists(self.BOOTSTRAP_IN_PROGRESS_FLAG):
            os.rename(self.BOOTSTRAP_IN_PROGRESS_FLAG, self.BOOTSTRAPPED_FLAG)

        if os.path.exists("/etc/clearwater/force_cassandra_yaml_refresh"):
            os.remove("/etc/clearwater/force_cassandra_yaml_refresh")
Example #20
0
 def write_cluster_settings(self, cluster_view):
     if self._remote_site != "":
         write_memcached_cluster_settings(
             "/etc/clearwater/remote_cluster_settings", cluster_view)
         run_command("/usr/share/clearwater/bin/reload_memcached_users")
 def on_new_cluster_config_ready(self, cluster_view):
     run_command("service astaire reload")
     run_command("service astaire wait-sync")
Example #22
0
 def on_cluster_changing(self, cluster_view):
     write_chronos_cluster_settings("/etc/chronos/chronos_cluster.conf",
                                    cluster_view,
                                    self.local_server)
     run_command("service chronos reload")
Example #23
0
 def on_new_cluster_config_ready(self, cluster_view):
     run_command("service chronos resync")
     run_command("service chronos wait-sync")
Example #24
0
 def on_new_cluster_config_ready(self, cluster_view):  # pragma: no cover
     self._alarm.set()
     run_command("service chronos resync")
     run_command("service chronos wait-sync")
 def write_cluster_settings(self, cluster_view):
     write_chronos_cluster_settings("/etc/chronos/chronos_cluster.conf",
                                    cluster_view, self.local_server,
                                    self.instance_id, self.deployment_id)
     run_command(["service", "chronos", "reload"])
 def can_contact_cassandra(self):
     rc = run_command(["/usr/share/clearwater/bin/poll_cassandra.sh",
         "--no-grace-period"], log_error=False)
     return (rc == 0)
 def on_new_cluster_config_ready(self, cluster_view):  # pragma: no cover
     self._alarm.set()
     run_command("service astaire reload")
     run_command("service astaire wait-sync")
 def on_new_cluster_config_ready(self, cluster_view):  # pragma: no cover
     self._alarm.set()
     run_command(["service", "astaire", "reload"])
     run_command(["service", "astaire", "wait-sync"])
 def write_cluster_settings(self, cluster_view):
     write_memcached_cluster_settings("/etc/clearwater/cluster_settings",
                                      cluster_view)
     run_command(["/usr/share/clearwater/bin/reload_memcached_users"])
    def on_config_changed(self, value, alarm):
        _log.info("Updating dns configuration file")

        if self.status(value) != FileStatus.UP_TO_DATE:
            safely_write(_file, value)
            run_command("/usr/share/clearwater/bin/reload_dns_config")