Ejemplo n.º 1
0
 def node_reboot(self, ip):
     rpc(ip,
         'sudo reboot now',
         self.username,
         self.password,
         self.key,
         timeout=60 * 2)
Ejemplo n.º 2
0
    def db_add_random_node(self, wait_time_min=0, randomness_time_injection=0):
        """
        Removes node from db instantly and then "adds" the node again at the given time.
        :param db:
        :param wait_time_min:
        :param randomness_time_injection:
        :return:
        """
        assert (wait_time_min >= 0)
        assert (randomness_time_injection >= 0)

        pick = pick_x_different_num(1, 0, len(self.cassandra.ips) - 1)[0]
        ip = self.cassandra.ips[pick]
        down_time = wait_time_min * 60 + random.randint(
            0, randomness_time_injection)

        note = 'Chose {%s} node to be added after %s seconds.' % (ip,
                                                                  down_time)
        add_test_note(note)
        cmd = '(nohup sudo ifdown eth0; sleep %s ; sudo ifup eth0 ; sudo reboot now) > /tmp/datos_failure.log &' % (
            down_time)  # disable eth0 then reboot at end to simulate failure.
        rpc(ip, cmd, self.cassandra.username, self.cassandra.password,
            self.cassandra.key)

        ip2 = self.cassandra.ips[(pick + 1) % len(self.cassandra.ips)]
        cmd = 'nodetool removenode %s' % ip
        rpc(ip2, cmd, self.cassandra.username, self.cassandra.password,
            self.cassandra.key)
Ejemplo n.º 3
0
 def db_start(self, ip):
     rpc(ip,
         'sudo service cassandra start',
         self.username,
         self.password,
         self.key,
         timeout=60 * 2)
Ejemplo n.º 4
0
 def node_shutdown(self, ip):
     rpc(ip,
         'sudo halt',
         self.username,
         self.password,
         self.key,
         timeout=60 * 2)
Ejemplo n.º 5
0
        def delta_worker():
            # Loop every 5 minutes and reinitialize delta.
            while self.do_delta_population:
                # Stop previous populations, in the case they are still going.
                rpc(workload_ip, cmd1, self.username, self.password, self.key)
                time.sleep(2)

                # Start new batch of populations.
                rpc(workload_ip, cmd2, self.username, self.password, self.key, no_tty=True)  # No tty so we can run as bg & disconnect.
                report('{%s} delta population set on node %s.' % (mgmt_object, workload_ip))
                time.sleep(60 * LOOP_MIN)  # Sleep LOOP_MIN min, allow delta to complete and settle, then cycle again. (A more dependable way)
Ejemplo n.º 6
0
 def shutdown(self):
     """
     Shutdown the whole db cluster.
     """
     for ip in self.ips:
         rpc(ip,
             'sudo service cassandra stop',
             self.username,
             self.password,
             self.key,
             timeout=60 * 2)
Ejemplo n.º 7
0
 def cfstats(self):
     # Cycle through the nodes until we get a result from nodetool cfstats.
     for ip in self.ips:
         out, _ = rpc(ip, "nodetool cfstats", self.username, self.password,
                      self.key)
         # TODO: (Aaron) finish ...
         return out
Ejemplo n.º 8
0
    def _deliver_payload(self):
        """
        Delivers population scripts and other goodies to the cassandra source cluster. Most stored in ~/.geppetto/
        """
        common_script_path = '%s/common/common.py' % (
            common.common.global_vars['geppetto_install_dir'])
        population_script_path = '%s/db_utils/cassandra_utils/data_population.py' % (
            common.common.global_vars['geppetto_install_dir'])
        schema_folder_path = '%s/db_utils/cassandra_utils/schema' % (
            common.common.global_vars['geppetto_install_dir'])
        for ip in self.ips:
            report('Updating Geppetto payload on {%s}.' % ip)
            to_path = '%s@%s:~/.geppetto/' % (self.username, ip)
            # rpc(ip, 'rm -rf ~/.geppetto', self.username, self.password, self.key, suppress_output=True)
            rpc(ip,
                'mkdir -p ~/.geppetto/common',
                self.username,
                self.password,
                self.key,
                suppress_output=True)
            rpc(ip,
                'touch ~/.geppetto/common/__init__.py',
                self.username,
                self.password,
                self.key,
                suppress_output=True)
            scp(common_script_path,
                '%s/common/' % to_path,
                self.password,
                self.key,
                suppress_output=True)
            scp(population_script_path,
                to_path,
                self.password,
                self.key,
                suppress_output=True)
            scp(schema_folder_path,
                to_path,
                self.password,
                self.key,
                is_dir=True,
                suppress_output=True)

        self.payload = True
        return True
Ejemplo n.º 9
0
 def nodetool_status(self):
     # Cycle through the nodes until we get a result from nodetool status.
     for ip in self.source_params['ips']:
         out, err = rpc(ip, "nodetool status | grep 'UN\|UL\|UJ\|UM\|DN\|DL\|DJ\|DM\|===='", self.username, self.password, self.key, suppress_output=True)
         if any(x in out for x in ['UN', 'UL', 'UJ', 'UM', 'DN', 'DL', 'DJ', 'DM']):
             return out
     response = pause_execution_for_input('No status received from Cassandra Nodetool', level='info')
     if response == 'r':
         self.nodetool_status()
Ejemplo n.º 10
0
 def get_compaction_history(self):
     cmd = 'nodetool compactionhistory'
     for ip in self.ips:
         try:
             out, err = rpc()
             return out
         except:
             pass
     return ''
Ejemplo n.º 11
0
 def get_compaction_history(self):
     cmd = 'nodetool compactionhistory'
     for ip in self.ips:
         try:
             out, err = rpc()
             return out
         except:
             pass
     return ''
Ejemplo n.º 12
0
    def _deliver_payload(self):
        """
        Delivers population scripts and other goodies to the cassandra source cluster. Most stored in ~/.geppetto/
        """
        common_script_path = '%s/common/common.py' % (common.common.global_vars['geppetto_install_dir'])
        population_script_path = '%s/db_utils/cassandra_utils/data_population.py' % (common.common.global_vars['geppetto_install_dir'])
        schema_folder_path = '%s/db_utils/cassandra_utils/schema' % (common.common.global_vars['geppetto_install_dir'])
        for ip in self.ips:
            report('Updating Geppetto payload on {%s}.' % ip)
            to_path = '%s@%s:~/.geppetto/' % (self.username, ip)
            # rpc(ip, 'rm -rf ~/.geppetto', self.username, self.password, self.key, suppress_output=True)
            rpc(ip, 'mkdir -p ~/.geppetto/common', self.username, self.password, self.key, suppress_output=True)
            rpc(ip, 'touch ~/.geppetto/common/__init__.py', self.username, self.password, self.key, suppress_output=True)
            scp(common_script_path, '%s/common/' % to_path, self.password, self.key, suppress_output=True)
            scp(population_script_path, to_path, self.password, self.key, suppress_output=True)
            scp(schema_folder_path, to_path, self.password, self.key, is_dir=True, suppress_output=True)

        self.payload = True
        return True
Ejemplo n.º 13
0
        def delta_worker():
            # Loop every 5 minutes and reinitialize delta.
            while self.do_delta_population:
                # Stop previous populations, in the case they are still going.
                rpc(workload_ip, cmd1, self.username, self.password, self.key)
                time.sleep(2)

                # Start new batch of populations.
                rpc(workload_ip,
                    cmd2,
                    self.username,
                    self.password,
                    self.key,
                    no_tty=True)  # No tty so we can run as bg & disconnect.
                report('{%s} delta population set on node %s.' %
                       (mgmt_object, workload_ip))
                time.sleep(
                    60 * LOOP_MIN
                )  # Sleep LOOP_MIN min, allow delta to complete and settle, then cycle again. (A more dependable way)
Ejemplo n.º 14
0
    def clean(self):
        """
        Caution! Empties database directories and commit logs for all nodes in db.
        :return:
        """
        report(
            'Cleaning data and commitlog directories for cluster {%s}' %
            (self.name), 'warning')
        cmd = 'sudo service cassandra stop'
        for ip in self.ips:
            rpc(ip, cmd, self.username, self.password, self.key)

        time.sleep(10)

        cmd_list = [
            'rm -f ~/.__jmxcmd*',
            'sudo rm -rf %s/*' % self.data_dir,
            'sudo rm -rf %s/*' % self.commitlog_dir,
            'sudo service cassandra start',
        ]
        for ip in self.ips[:1]:
            for cmd in cmd_list:
                rpc(ip, cmd, self.username, self.password, self.key)

        time.sleep(30)

        for ip in self.ips[1:]:
            for cmd in cmd_list:
                rpc(ip, cmd, self.username, self.password, self.key)

        time.sleep(30)
        report('Status cluster {%s} \n %s' % (self.name, self.status()))
Ejemplo n.º 15
0
    def clean(self):
        """
        Caution! Empties database directories and commit logs for all nodes in db.
        :return:
        """
        report('Cleaning data and commitlog directories for cluster {%s}' % (self.name), 'warning')
        cmd = 'sudo service cassandra stop'
        for ip in self.ips:
            rpc(ip, cmd, self.username, self.password, self.key)

        time.sleep(10)

        cmd_list = [
            'rm -f ~/.__jmxcmd*',
            'sudo rm -rf %s/*' % self.data_dir,
            'sudo rm -rf %s/*' % self.commitlog_dir,
            'sudo service cassandra start',
        ]
        for ip in self.ips[:1]:
            for cmd in cmd_list:
                rpc(ip, cmd, self.username, self.password, self.key)

        time.sleep(30)

        for ip in self.ips[1:]:
            for cmd in cmd_list:
                rpc(ip, cmd, self.username, self.password, self.key)

        time.sleep(30)
        report('Status cluster {%s} \n %s' % (self.name, self.status()))
Ejemplo n.º 16
0
        def mass_worker():
            record_count_per_node = int(record_count / len(population_ips))
            node_start_record = start_record

            auth_string = ''
            if self.db_user:
                auth_string = '--db_user %s --db_pass %s' % (self.db_user, self.db_pass)

            for ip in population_ips:
                report('Setting mass population on cluster {%s} node {%s}.' % (self.name, ip), 'warning')

                # Clean log first.
                cmd = 'sudo rm /tmp/mass_population.log'
                rpc(ip, cmd, self.username, self.password, self.key)

                cmd = '(python ~/.geppetto/data_population.py ' \
                      '%s %s %s ' \
                      'insert ' \
                      '-r %s ' \
                      '-s %s ' \
                      '-n %s ' \
                      '-t %s ' \
                      '--replication %s ' \
                      ') > /tmp/mass_population.log &' % \
                      (ip, schema_file, auth_string,
                       record_size,
                       node_start_record,
                       record_count_per_node,
                       mgmt_object,
                       replication)

                node_start_record += record_count_per_node

                rpc(ip, cmd, self.username, self.password, self.key, no_tty=True)  # No tty so we can run as bg & disconnect.

            if not async:
                cmd = 'ps -ef | grep geppetto | grep -v grep | wc -l'
                cmd2 = 'tail -1 /tmp/mass_population.log'
                while True:
                    try:
                        report('Populating ...')

                        processes_running = 0
                        for ip in population_ips:
                            out, err = rpc(ip, cmd, self.username, self.password, self.key, suppress_output=True)
                            out2, err2 = rpc(ip, cmd2, self.username, self.password, self.key, suppress_output=True)
                            report('<%s> %s' % (ip, out2))
                            try:
                                processes_running += int(out)
                            except Exception as e:
                                report(e, 'critical')
                                raise
                        if processes_running == 0:
                            break
                    except Exception as e:
                        report(e, 'critical')
                        break

                    time.sleep(15)
Ejemplo n.º 17
0
    def db_remove_random_node(self,
                              wait_time_min=0,
                              run_time_min=10,
                              randomness_time_injection=0):
        """
        Removes node from cassandra cluster for given time.
        :param db:
        :param wait_time_min:
        :param run_time_min:
        :param randomness_time_injection:
        :return:
        """
        assert (wait_time_min >= 0)
        assert (run_time_min >= 0)
        assert (randomness_time_injection >= 0)

        time.sleep(wait_time_min * 60)
        time.sleep(random.randint(0, randomness_time_injection))

        pick = pick_x_different_num(1, 0, len(self.cassandra.ips) - 1)[0]
        ip = self.cassandra.ips[pick]
        down_time = int(max(
            0, run_time_min * 60 -
            45))  # We subtract about 45 seconds due to reboot time at the end.

        note = 'Chose {%s} node to be removed for %s seconds.' % (ip,
                                                                  down_time)
        add_test_note(note)

        cmd = '(nohup sudo ifdown eth0; sleep %s ; sudo ifup eth0 ; sudo reboot now) > /tmp/datos_failure.log &' % (
            down_time)  # disable eth0 then reboot at end to simulate failure.
        rpc(ip, cmd, self.cassandra.username, self.cassandra.password,
            self.cassandra.key)

        ip2 = self.cassandra.ips[(pick + 1) % len(self.cassandra.ips)]
        cmd = 'nodetool removenode %s' % ip
        rpc(ip2, cmd, self.cassandra.username, self.cassandra.password,
            self.cassandra.key)
Ejemplo n.º 18
0
 def nodetool_status(self):
     # Cycle through the nodes until we get a result from nodetool status.
     for ip in self.source_params['ips']:
         out, err = rpc(
             ip,
             "nodetool status | grep 'UN\|UL\|UJ\|UM\|DN\|DL\|DJ\|DM\|===='",
             self.username,
             self.password,
             self.key,
             suppress_output=True)
         if any(x in out
                for x in ['UN', 'UL', 'UJ', 'UM', 'DN', 'DL', 'DJ', 'DM']):
             return out
     response = pause_execution_for_input(
         'No status received from Cassandra Nodetool', level='info')
     if response == 'r':
         self.nodetool_status()
Ejemplo n.º 19
0
    def single_random_node_failure(self,
                                   wait_time_min=0,
                                   run_time_min=10,
                                   time_length_of_failure=5,
                                   max_failure_repeats=1,
                                   randomness_time_injection=90):
        """
        Shuts down a random cassandra node for a given time with some randomness thrown in for timing.
        """
        assert (wait_time_min >= 0)
        assert (run_time_min >= 0.1)
        assert (time_length_of_failure >= 0.2)
        assert (max_failure_repeats > 0)
        assert (randomness_time_injection >= 0)

        self.cassandra.status()

        start_time = time.time()
        time.sleep(60 * wait_time_min)

        pick = pick_x_different_num(1, 0, len(self.cassandra.ips) - 1)[0]
        for _ in xrange(max_failure_repeats):
            time.sleep(random.randint(
                0, randomness_time_injection))  # Randomize time that db fails.
            currently_down = collections.deque()

            try:
                # Bring down the db node for some time.
                currently_down.append(pick)

                try:
                    note = ''
                    add_test_note(note)
                    rpc(
                        self.cassandra.ips[pick],
                        '(nohup sudo ifdown eth0; sleep %s ; sudo ifup eth0 ; ) > /tmp/datos_failure.log &'
                        % (time_length_of_failure * 60),
                        self.cassandra.username, self.cassandra.password,
                        self.cassandra.key)
                except:
                    report('Could not connect to node {%s}.' % db.ips[pick],
                           'warning')

                self.cassandra.status()  # Let's see the db state.
                time.sleep(60 * time_length_of_failure + 60)

                # Bring db node back up.
                self.cassandra.node_restore(
                    self.cassandra.ips[pick]
                )  # Currently we don't have good way to restore so this does nothing.
                currently_down.popleft()
                time.sleep(20)
                self.cassandra.status()

            except (KeyboardInterrupt, SystemExit) as e:
                # Do some clean up (restore db nodes) and some reporting, then re-raise exception.
                report('Exit detected ... restoring db state', 'critical')
                for i in currently_down:
                    self.cassandra.node_restore(db.ips[pick])
                time.sleep(20)
                self.cassandra.status()  # Logs will capture output.
                global global_vars
                global_vars['test_status'] = 'Aborted'
                add_test_note(e)
                raise e

            # Exit failure loop if we've reached max time.
            if (time.time() + wait_time_min * 60 - start_time >=
                    run_time_min * 60):
                break
Ejemplo n.º 20
0
 def stop_mass_population(self):
     self.do_mass_population = False
     cmd = '''ps -ef | grep -v grep | grep geppetto | awk '{print $2}' | xargs kill -9'''
     for ip in self.ips:
         rpc(ip, cmd, self.username, self.password, self.key)
Ejemplo n.º 21
0
        def mass_worker():
            record_count_per_node = int(record_count / len(population_ips))
            node_start_record = start_record

            auth_string = ''
            if self.db_user:
                auth_string = '--db_user %s --db_pass %s' % (self.db_user,
                                                             self.db_pass)

            for ip in population_ips:
                report(
                    'Setting mass population on cluster {%s} node {%s}.' %
                    (self.name, ip), 'warning')

                # Clean log first.
                cmd = 'sudo rm /tmp/mass_population.log'
                rpc(ip, cmd, self.username, self.password, self.key)

                cmd = '(python ~/.geppetto/data_population.py ' \
                      '%s %s %s ' \
                      'insert ' \
                      '-r %s ' \
                      '-s %s ' \
                      '-n %s ' \
                      '-t %s ' \
                      '--replication %s ' \
                      ') > /tmp/mass_population.log &' % \
                      (ip, schema_file, auth_string,
                       record_size,
                       node_start_record,
                       record_count_per_node,
                       mgmt_object,
                       replication)

                node_start_record += record_count_per_node

                rpc(ip,
                    cmd,
                    self.username,
                    self.password,
                    self.key,
                    no_tty=True)  # No tty so we can run as bg & disconnect.

            if not async:
                cmd = 'ps -ef | grep geppetto | grep -v grep | wc -l'
                cmd2 = 'tail -1 /tmp/mass_population.log'
                while True:
                    try:
                        report('Populating ...')

                        processes_running = 0
                        for ip in population_ips:
                            out, err = rpc(ip,
                                           cmd,
                                           self.username,
                                           self.password,
                                           self.key,
                                           suppress_output=True)
                            out2, err2 = rpc(ip,
                                             cmd2,
                                             self.username,
                                             self.password,
                                             self.key,
                                             suppress_output=True)
                            report('<%s> %s' % (ip, out2))
                            try:
                                processes_running += int(out)
                            except Exception as e:
                                report(e, 'critical')
                                raise
                        if processes_running == 0:
                            break
                    except Exception as e:
                        report(e, 'critical')
                        break

                    time.sleep(15)
Ejemplo n.º 22
0
 def cfstats(self):
     # Cycle through the nodes until we get a result from nodetool cfstats.
     for ip in self.ips:
         out, _ = rpc(ip, "nodetool cfstats", self.username, self.password, self.key)
         # TODO: (Aaron) finish ...
         return out
Ejemplo n.º 23
0
 def db_start(self, ip):
     rpc(ip, 'sudo service cassandra start', self.username, self.password, self.key, timeout=60*2)
Ejemplo n.º 24
0
 def node_reboot(self, ip):
     rpc(ip, 'sudo reboot now', self.username, self.password, self.key, timeout=60*2)
Ejemplo n.º 25
0
 def node_shutdown(self, ip):
     rpc(ip, 'sudo halt', self.username, self.password, self.key, timeout=60*2)
Ejemplo n.º 26
0
 def shutdown(self):
     """
     Shutdown the whole db cluster.
     """
     for ip in self.ips:
         rpc(ip, 'sudo service cassandra stop', self.username, self.password, self.key, timeout=60*2)
Ejemplo n.º 27
0
 def stop_mass_population(self):
     self.do_mass_population = False
     cmd = '''ps -ef | grep -v grep | grep geppetto | awk '{print $2}' | xargs kill -9'''
     for ip in self.ips:
         rpc(ip, cmd, self.username, self.password, self.key)