예제 #1
0
    def _validate_ring(self, instance):
        """
        Run nodetool to verify that a ring is valid.
        """

        ring_output = exec_command("nodetool --host %s ring" % instance.private_dns_name)

        if ring_output.failed:
            return ring_output.return_code

        # some nodes can be down, but nodetool will still exit cleanly,
        # so doing some extra validation to ensure that all nodes of 
        # the ring are "Up" and "Normal" and manually set a bad return 
        # code otherwise
        retcode = 0
        for node in ring_output.splitlines()[3:]:
            #host = node[:16].strip()
            #data_center = node[16:28].strip()
            #rack = node[28:40].strip()
            #status = node[40:47].strip()
            #state = node[47

            nodesplit = node.split()

            self.logger.debug("Node %s is %s and %s" % (nodesplit[0], nodesplit[3], nodesplit[4]))
            if nodesplit[3].lower() != "up" and nodesplit[4].lower() != "normal":
                self.logger.debug("Node %s ring is not healthy" % nodesplit[0])
                self.logger.debug("Ring status:")
                self.logger.debug(ring_output)
                retcode = 200

        return retcode
예제 #2
0
    def start_cassandra(self, instances=None, print_ring=True, retry=False):
        """Start Cassandra services on instances.
        To validate that Cassandra is running, this will check the output of
        nodetool ring, make sure that gossip and thrift are running, and check
        that nodetool info reports Normal mode.  If these tests do not pass
        within the timeout threshold, it will retry up to
        self.MAX_RESTART_ATTEMPTS times to restart.  If after meeting the max
        allowed, it will raise a TimeoutException.
        """

        if retry:
            self.logger.info("Attempting to start again (%s of %s)" % (self.current_attempt-1, self.MAX_RESTART_ATTEMPTS))
            print("Cassandra failed to start - attempting to start again (%s of %s)" % (self.current_attempt-1, self.MAX_RESTART_ATTEMPTS))

        if instances is None:
            instances = self.get_instances()

        for instance in instances:
            with settings(host_string=instance.public_dns_name, warn_only=True): #, hide("everything"):
                errors = -1
                self.logger.info("Starting Cassandra service on %s..." % instance.id)

                while True:
                    try:
                        # check to see if cassandra is running

                        if self.is_running(instance):
                            self.logger.info("Cassandra is running.")
                            break

                        # start it if this is the first time
                        if errors < 0:
                            self.logger.info("Cassandra is not running. Attempting to start now...")
                            print("Cassandra is not running. Attempting to start now...")
                            exec_command("service cassandra start", pty=False)
                        elif errors >= 5:
                            #tail = sudo("tail -n 50 /var/log/cassandra/output.log")
                            #self.logger.error(tail)
                            raise RuntimeError("Unable to start cassandra. Check the logs for more information.")
                        self.logger.info("Error detecting Cassandra status...will try again in 3 seconds.")
                        errors += 1
                        time.sleep(3)

                    except SystemExit, e:
                        self.logger.error(str(e))
예제 #3
0
    def _run_nodetool(self, ntcommand, instance=None):
        if instance is None:
            instance = self.get_instances()[0]

        self.logger.debug("running nodetool on instance %s", instance.id)
        with settings(host_string=instance.public_dns_name, warn_only=True), hide("everything"):
            output = exec_command("nodetool -h %s %s" % (instance.private_dns_name, ntcommand))

        return output
예제 #4
0
    def stop_cassandra(self, instances=None):
        if instances is None:
          instances = self.get_instances()

        for instance in instances:
            self.logger.info("Stopping Cassandra on %s" % instance.id)
            with settings(host_string=instance.public_dns_name, warn_only=True), hide("everything"):
                result = exec_command("service cassandra stop")
                self.logger.info(result)

        self.logger.debug("Shutdown complete.")
예제 #5
0
    def _discover_ring(self, instance=None):
        if instance is None:
            instance = self.get_instances()[0]

        with settings(host_string=instance.public_dns_name, warn_only=True), hide("everything"):
            status = exec_command("service cassandra status")

            if status.failed:
                raise RuntimeException("Cassandra does not appear to be running.")

            self.logger.debug("Discovering ring...")
            retcode, output = self._run_nodetool("ring", instance)
            self.logger.debug("node tool output:\n%s" % output)
            lines = output.split("\n")[2:]

            assert len(lines) > 0, "Ring output must have more than two lines."

            self.logger.debug("Found %d nodes" % len(lines))
        
            return [parse_nodeline(line) for line in lines]
예제 #6
0
    def _configure_cassandra_instance(self, instance, seed_ips, token, set_tokens=True, auto_bootstrap=False):
        self.logger.debug("Configuring %s..." % instance.id)
        yaml_file = os.path.join("/tmp", "cassandra.yaml")
        cassandra_home = self.get_cassandra_home(instance)

        self.logger.debug("Local cassandra.yaml file: %s" % yaml_file)
        with settings(host_string=instance.public_dns_name, warn_only=True): #, hide("everything"):

            cassandra_data = os.path.join("/mnt", "cassandra-data")
            cassandra_logs = os.path.join("/mnt", "cassandra-logs")

            # create directories and log files
            exec_command("mkdir -p %s" % cassandra_data)
            exec_command("mkdir -p %s" % cassandra_logs)

            # set permissions
            exec_command("chown -R cassandra:cassandra %s %s" % (cassandra_data, cassandra_logs))

            try:
                # get yaml file
                get(os.path.join(cassandra_home, "conf", "cassandra.yaml"), "/tmp")

                # modify it
                f = open(yaml_file)
                yaml = parse_yaml(f)
                f.close()

                yaml['seed_provider'][0]['parameters'][0]['seeds'] = ",".join(seed_ips)
                if set_tokens is True :
                    yaml['initial_token'] = token
                if auto_bootstrap :
                    yaml['auto_bootstrap'] = 'true'
                yaml['data_file_directories'] = [cassandra_data]
                yaml['commitlog_directory'] = cassandra_logs
                yaml['listen_address'] = str(instance.private_dns_name)
                yaml['rpc_address'] = str(instance.public_dns_name)

                f = open(yaml_file, "w")
                f.write(dump_yaml(yaml))
                f.close()

                # put modified yaml file
                put(yaml_file, os.path.join(cassandra_home, "conf", "cassandra.yaml"), use_sudo=use_sudo())
            except SystemExit, e:
                raise
                pass
예제 #7
0
 def get_cassandra_home(self, instance):
     with settings(host_string=instance.public_dns_name, warn_only=True):
         return exec_command("echo $CASSANDRA_HOME")
예제 #8
0
 def is_running(self, instance):
     with settings(host_string=instance.public_dns_name), hide("everything"):
         return "is running" in exec_command("service cassandra status")
예제 #9
0
 def get_cassandra_pid(self, instance):
     with settings(host_string=instance.public_dns_name, warn_only=True):
         pid = exec_command("cat /var/run/cassandra.pid")
         if pid.failed:
             return None
         return pid
예제 #10
0
                    except SystemExit, e:
                        self.logger.error(str(e))

        # test connection
        self.logger.debug("Testing connection to each Cassandra instance...")

        temp_instances = instances[:]
        while len(temp_instances) > 0:
            instance = temp_instances[-1]

            with settings(host_string=instance.public_dns_name, warn_only=True), hide("everything"):
                # does the ring look ok?
                ring_retcode = self._validate_ring(instance)

                # is gossip running?
                gossip_retcode = exec_command("nodetool -h %s info | grep Gossip | grep true" % instance.private_dns_name).return_code

                # are the netstats looking ok?
                netstats_retcode = exec_command("nodetool -h %s netstats | grep 'Mode: NORMAL'" % instance.private_dns_name).return_code

                # is thrift running?
                thrift_retcode = exec_command("/bin/netstat -an | grep 9160").return_code

                if ring_retcode == 0 and gossip_retcode == 0 and netstats_retcode == 0 and thrift_retcode == 0:
                    temp_instances.pop()
                else:
                    if ring_retcode != 0:
                        self.logger.warn("Return code for 'nodetool ring' on '%s': %d" % (temp_instances[-1].id, ring_retcode))
                    if gossip_retcode != 0:
                        self.logger.warn("Return code for 'nodetool info | grep Gossip' on '%s': %d" % (temp_instances[-1].id, gossip_retcode))
                    if netstats_retcode != 0: