def run_batch(self, batch_jobs, batch_inputs):
        batch_id = self.coordinator_db.next_batch_id

        log.info("Running batch %d with the following job(s): %s" %
                 (batch_id, ', '.join(map(str, batch_jobs))))


        # Create log directory for the current batch
        batch_logs = create_batch_directory(self.log_directory, batch_id)

        # Copy description files to the log directory
        description_dir = os.path.join(
            os.path.dirname(__file__), os.pardir, os.pardir, os.pardir,
            "tritonsort", "mapreduce", "description")
        shutil.copy(os.path.join(description_dir, "stages.json"), batch_logs)
        shutil.copy(os.path.join(description_dir, "structure.json"), batch_logs)

        # Copy config file to log directory
        shutil.copy(self.config_file, batch_logs)

        self.ready_for_next_batch = False

        # Pull out relevant phase zero parameters
        phase_zero_sample_rate = 1 # Sample 100% by default
        if "SAMPLE_RATE" in self.config:
            phase_zero_sample_rate = float(self.config["SAMPLE_RATE"])
        phase_zero_sample_points_per_file = 1 # Sample prefixes by default
        if "SAMPLES_PER_FILE" in self.config:
            phase_zero_sample_points_per_file = \
                int(self.config["SAMPLES_PER_FILE"])
        fixed_key_length = None
        if "MAP_INPUT_FIXED_KEY_LENGTH" in self.config:
            fixed_key_length = int(self.config["MAP_INPUT_FIXED_KEY_LENGTH"])
        fixed_value_length = None
        if "MAP_INPUT_FIXED_VALUE_LENGTH" in self.config:
            fixed_value_length = \
                int(self.config["MAP_INPUT_FIXED_VALUE_LENGTH"])

        # If the application config file (yaml) or the job spec file (json)
        # skips a phase, we should not load read requests for that phase. The
        # job spec file should override the application config file.
        skip_phase_zero = 0
        skip_phase_one = 0
        skip_phase_two = 0
        skip_phase_three = 0
        if "SKIP_PHASE_ZERO" in self.config and self.config["SKIP_PHASE_ZERO"]:
            skip_phase_zero = 1
        if "SKIP_PHASE_ONE" in self.config and self.config["SKIP_PHASE_ONE"]:
            skip_phase_one = 1
        if "SKIP_PHASE_TWO" in self.config and self.config["SKIP_PHASE_TWO"]:
            skip_phase_two = 1
        if "SKIP_PHASE_THREE" in self.config and \
                self.config["SKIP_PHASE_THREE"]:
            skip_phase_three = 1

        # The run_job.py script verifies that all jobs in the batch have the
        # same value of these skip parameters in the job specs, so we can just
        # check the first one.
        for key, value in (
            self.coordinator_db.job_params(batch_jobs[0]).items()):
            if key == "SKIP_PHASE_ZERO":
                skip_phase_zero = value
            if key == "SKIP_PHASE_ONE":
                skip_phase_one = value
            if key == "SKIP_PHASE_TWO":
                skip_phase_two = value
            if key == "SKIP_PHASE_THREE":
                skip_phase_three = value
            if key == "MAP_INPUT_FIXED_KEY_LENGTH":
                fixed_key_length = int(value)
            if key == "MAP_INPUT_FIXED_VALUE_LENGTH":
                fixed_value_length = int(value)

        fixed_tuple_length = None
        if fixed_key_length != None and fixed_value_length != None:
            fixed_tuple_length = fixed_key_length + fixed_value_length

        use_replication = False
        if "OUTPUT_REPLICATION_LEVEL" in self.config and \
                int(self.config["OUTPUT_REPLICATION_LEVEL"]) > 1:
            use_replication = True

        phases = []
        if not skip_phase_zero:
            phases.append(0)
        if not skip_phase_one:
            phases.append(1)
        if not skip_phase_two and use_replication:
            # If we're using replication, phase two will have network transfer,
            # use barriers to guarantee sockets are connected.
            phases.append(2)
        if not skip_phase_three and use_replication:
            # If we're using replication, phase three will have network
            # transfer, use barriers to guarantee sockets are connected.
            phases.append(3)

        # Setup barriers
        self.coordinator_db.create_barriers(phases, batch_id, batch_jobs)

        # Generate read requests for the jobs in the batch
        read_requests = generate_read_requests(
            job_inputs = batch_inputs,
            phase_zero_sample_rate = phase_zero_sample_rate,
            phase_zero_sample_points_per_file =\
                phase_zero_sample_points_per_file,
            tuple_start_offset = fixed_tuple_length,
            job_ids = batch_jobs, phases=phases)

        # Load read requests into read request queue for each worker
        load_read_requests(self.coordinator_db, read_requests)

        start_time = time.time()
        # Mark phase zero as starting now.
        self.coordinator_db.begin_phase(batch_id, "phase_zero")
        self.batch_phase_info[batch_id] = ("phase_zero", 0, start_time)
        log.info("Running phase_zero...")
        print_keyboard_commands()

        for job_id in batch_jobs:
            self.coordinator_db.update_job_status(
                job_id, { "start_time" : str(start_time),
                          "batch_id" : batch_id,
                          "date" : time.asctime()})

        self.coordinator_db.add_jobs_to_batch(batch_id, batch_jobs)

        self.coordinator_db.mark_batch_incomplete(batch_id)

        # Setting current_batch will cause all node coordinators to start work
        # on that batch
        self.coordinator_db.add_batch_to_node_coordinator_batch_queues(batch_id)
Example #2
0
    def run(self):
        # Any pending batches won't be processed by this client
        self.coordinator_db.clear_batch_queue(self.hostname)

        remaining_live_retries = 10

        # Make sure the entire cluster is ping-able
        nodes = list(self.coordinator_db.live_nodes)
        self.coordinator_db.wait_for_ping_request(self.hostname)
        # Issue fping command to the entire cluster
        log.info("Pinging %s" % nodes)
        command = fping["-u"]
        for node in nodes:
            command = command[node]
        unreachable_nodes = command()
        unreachable_nodes = unreachable_nodes.encode("ascii")
        log.info("Unreachable nodes: %s" % unreachable_nodes)
        # Report results to the cluster coordinator
        self.coordinator_db.send_ping_reply(self.hostname, unreachable_nodes)

        while True:
            # Re-grab my node ID, the list of nodes, and the number of
            # intermediate disks on each node
            nodes = list(self.coordinator_db.live_nodes)
            nodes.sort()

            try:
                node_id = nodes.index(self.hostname)
                remaining_live_retries = 10

            except ValueError:
                error_message = (
                    ("Can't find my hostname (%s) in the list of valid "
                     "nodes") % (self.hostname))
                log.error(error_message)

                # Sleep for a little while and try again
                remaining_live_retries -= 1

                if remaining_live_retries == 0:
                    raise RuntimeError(error_message)
                else:
                    time.sleep(1)

                continue

            intermediate_disk_counts = []

            for node in nodes:
                intermediate_disk_counts.append(
                    len(self.coordinator_db.local_disks(node)))

            # Make sure we have the same number of intermediate disks on each
            # node.
            if len(set(intermediate_disk_counts)) != 1:
                error_message = (
                    ("All nodes should have the same number of intermediate "
                     "disks, but counts are %s") % (intermediate_disk_counts))
                log.error(error_message)
                raise RuntimeError(error_message)
            num_intermediate_disks = intermediate_disk_counts[0]

            node_ips = map(lambda x: self.coordinator_db.ipv4_address(x),
                           nodes)

            log.info(node_ips)
            log.info(node_id)

            self.ip_address = node_ips[node_id]

            # Get IPs for all interfaces
            node_interface_ips = map(
                lambda x: self.coordinator_db.interfaces(x), nodes)

            intermediate_disks = self.coordinator_db.local_disks(self.hostname)

            # If we're writing output to local disks, we need to know what
            # those local disks are
            output_disks = self.coordinator_db.io_disks(self.hostname)

            # Get the next batch number from the coordinator
            log.info("Waiting for the next batch ...")
            self.current_batch = (
                self.coordinator_db.blocking_wait_for_next_batch(
                    self.hostname))

            log.info("Running batch %d" % (self.current_batch))

            # Make a temporary directory to hold logical disk counts and
            # partition information; put a nonce in the directory name to avoid
            # collisions. Store it on this node's first intermediate disk to
            # avoid running into /tmp size limits

            tmp_files_dir = os.path.join(
                intermediate_disks[0],
                "%(username)s_tempfiles_batch_%(batch_number)d_%(nonce)x" % {
                    "username": self.username,
                    "batch_number": self.current_batch,
                    "nonce": self.batch_nonce
                })

            assert not os.path.exists(tmp_files_dir)

            os.makedirs(tmp_files_dir)

            # Construct log directory based on current batch
            base_log_dir = create_batch_directory(self.log_directory,
                                                  self.current_batch)

            batch_jobs = self.coordinator_db.batch_jobs(self.current_batch)
            # Determine which phases we're running based on the app config and
            # and first job's job-spec
            job_params = self.coordinator_db.job_params(batch_jobs[0])
            skip_params = [
                "SKIP_PHASE_ZERO", "SKIP_PHASE_ONE", "SKIP_PHASE_TWO",
                "SKIP_PHASE_THREE"
            ]
            skipped_phases = {}
            for param in skip_params:
                # By default don't skip the phase
                skipped_phases[param] = False
                # First load app config
                if param in self.config:
                    skipped_phases[param] = self.config[param]
                # Then load job spec
                if param in job_params:
                    skipped_phases[param] = job_params[param]

            # Special case for daytona minutesort
            daytona_minutesort = False
            if "DAYTONA_MINUTESORT" in job_params and \
                    job_params["DAYTONA_MINUTESORT"]:
                daytona_minutesort = True
                skipped_phases["SKIP_PHASE_ZERO"] = False
                skipped_phases["SKIP_PHASE_ONE"] = True
                skipped_phases["SKIP_PHASE_TWO"] = True
                skipped_phases["SKIP_PHASE_THREE"] = True

            # Need to make a disk-backed boundary list file for each job in the
            # batch, and retrieve any boundary list files for jobs that those
            # jobs are recovering

            global_boundary_list_files = self.lookup_global_boundary_lists(
                batch_jobs, base_log_dir)

            if type(global_boundary_list_files) == int:
                # There was some sort of error while grabbing the boundary
                # file for the returned job; abort this job
                self.fail_current_batch(
                    "Couldn't fetch global boundary list files for job %d" %
                    (global_boundary_list_files))
                self.coordinator_db.node_completed_batch(
                    self.hostname, self.current_batch)
                continue

            # If any part of the batch fails, we should skip all subsequent
            # parts, but still clean up appropriately
            continue_batch = True
            logical_disk_counts_files = {}
            boundary_list_files = {}

            command_params = {
                "OUTPUT_DISK_LIST": ','.join(output_disks),
                "MYPEERID": node_id,
                "MY_IP_ADDRESS": self.ip_address,
                "PEER_LIST": ','.join(node_interface_ips),
                "NUM_INTERFACES": self.num_interfaces,
                "CONFIG": self.config_file,
                "DEFAULT_CONFIG": self.default_config,
                "SKIP_PHASE_ONE": 1,
                "SKIP_PHASE_TWO": 1,
                "SKIP_PHASE_THREE": 1,
                "COORDINATOR.HOSTNAME": self.redis_host,
                "COORDINATOR.PORT": self.redis_port,
                "COORDINATOR.DB": self.redis_db,
                "BATCH_ID": str(self.current_batch),
                "NUM_INPUT_DISKS":
                len(self.coordinator_db.io_disks(self.hostname))
            }

            if skipped_phases["SKIP_PHASE_ZERO"] == False:
                # Execute phase zero for each job in the batch
                for job_id in batch_jobs:

                    if not continue_batch:
                        break

                    phase_zero_log_dir = os.path.join(
                        base_log_dir, "phase_zero_job_%d" % (job_id))

                    logical_disk_counts_file = os.path.join(
                        tmp_files_dir, "logical_disk_counts.%d" % (job_id))
                    logical_disk_counts_files[
                        job_id] = logical_disk_counts_file

                    boundary_list_file = os.path.join(
                        tmp_files_dir, "boundary_list.%d" % (job_id))
                    boundary_list_files[job_id] = boundary_list_file

                    command_params["LOG_DIR"] = phase_zero_log_dir
                    command_params[
                        "LOGICAL_DISK_COUNTS_FILE"] = logical_disk_counts_file
                    command_params["BOUNDARY_LIST_FILE"] = boundary_list_file
                    command_params["JOB_IDS"] = str(job_id)

                    for job_id in global_boundary_list_files:
                        param_name = "DISK_BACKED_BOUNDARY_LIST.%d" % (job_id)

                        command_params[param_name] = (
                            global_boundary_list_files[job_id])

                    if daytona_minutesort:
                        for job_id, filename in boundary_list_files.items():
                            command_params["BOUNDARY_LIST_FILE.%d" % (job_id)] = \
                                filename

                    # Pull in any parameters that may have been set for this job,
                    # overriding the parameters set above
                    for key, value in (
                            self.coordinator_db.job_params(job_id).items()):

                        command_params[key] = value

                    continue_batch = self._run_themis(self.themis_binary,
                                                      command_params,
                                                      phase_zero_log_dir)

                    # Copy one of the logical disk counts file to a well-known
                    # location

                    if continue_batch and node_id == 0:
                        if os.path.exists(logical_disk_counts_file):
                            shutil.copy(
                                logical_disk_counts_file,
                                os.path.join(
                                    phase_zero_log_dir,
                                    os.path.basename(
                                        logical_disk_counts_file)))
                        else:
                            log.error(
                                "Can't find logical disk counts file '%s'" %
                                (logical_disk_counts_file))

            # Notify redis that we're done with phase zero
            self.coordinator_db.phase_completed(self.current_batch,
                                                self.ip_address, "phase_zero")

            if skipped_phases["SKIP_PHASE_ONE"] == False:
                # Execute phase one with all jobs at once
                if continue_batch:
                    phase_one_log_dir = os.path.join(base_log_dir, "phase_one")

                    if "BOUNDARY_LIST_FILE" in command_params:
                        del command_params["BOUNDARY_LIST_FILE"]
                    if "LOGICAL_DISK_COUNTS_FILE" in command_params:
                        del command_params["LOGICAL_DISK_COUNTS_FILE"]
                    if "SKIP_PHASE_ONE" in command_params:
                        del command_params["SKIP_PHASE_ONE"]
                    command_params["SKIP_PHASE_ZERO"] = 1
                    command_params["SKIP_PHASE_TWO"] = 1
                    command_params["SKIP_PHASE_THREE"] = 1
                    command_params["JOB_IDS"] = ','.join(map(str, batch_jobs))

                    command_params["LOG_DIR"] = phase_one_log_dir

                    for job_id, filename in logical_disk_counts_files.items():
                        command_params["LOGICAL_DISK_COUNTS_FILE.%d" % (job_id)] = \
                            filename

                    for job_id, filename in boundary_list_files.items():
                        command_params["BOUNDARY_LIST_FILE.%d" % (job_id)] = \
                            filename

                    for job_id in batch_jobs:
                        # Pull in any parameters that may have been set for this job,
                        # overriding the parameters set above
                        # \\\TODO(MC): This doesn't work for multiple jobs.
                        for key, value in (self.coordinator_db.job_params(
                                job_id).items()):

                            command_params[key] = value

                    continue_batch = self._run_themis(self.themis_binary,
                                                      command_params,
                                                      phase_one_log_dir)

            # Notify redis that we're done with phase one
            self.coordinator_db.phase_completed(self.current_batch,
                                                self.ip_address, "phase_one")

            if skipped_phases["SKIP_PHASE_TWO"] == False:
                # Execute phase two with all jobs at once
                if continue_batch:
                    phase_two_log_dir = os.path.join(base_log_dir, "phase_two")

                    if "SKIP_PHASE_TWO" in command_params:
                        del command_params["SKIP_PHASE_TWO"]
                    command_params["SKIP_PHASE_ZERO"] = 1
                    command_params["SKIP_PHASE_ONE"] = 1
                    command_params["SKIP_PHASE_THREE"] = 1
                    command_params["LOG_DIR"] = phase_two_log_dir

                    # Execute phase two
                    continue_batch = self._run_themis(
                        self.themis_binary + "_phase_two", command_params,
                        phase_two_log_dir)

            # Notify redis that we're done with phase two
            self.coordinator_db.phase_completed(self.current_batch,
                                                self.ip_address, "phase_two")

            if skipped_phases["SKIP_PHASE_THREE"] == False:
                # Execute phase three for each job in the batch
                for job_id in batch_jobs:
                    if not continue_batch:
                        break

                    phase_three_log_dir = os.path.join(
                        base_log_dir, "phase_three_job_%d" % (job_id))

                    if "SKIP_PHASE_THREE" in command_params:
                        del command_params["SKIP_PHASE_THREE"]
                    command_params["SKIP_PHASE_ZERO"] = 1
                    command_params["SKIP_PHASE_ONE"] = 1
                    command_params["SKIP_PHASE_TWO"] = 1
                    command_params["LOG_DIR"] = phase_three_log_dir

                    # Execute phase three
                    continue_batch = self._run_themis(self.themis_binary,
                                                      command_params,
                                                      phase_three_log_dir)

            # Notify redis that we're done with phase three
            self.coordinator_db.phase_completed(self.current_batch,
                                                self.ip_address, "phase_three")

            if continue_batch:
                log.info("Batch %d succeeded" % (self.current_batch))
            else:
                log.info("Batch %d failed" % (self.current_batch))

            # Done processing this batch
            self.coordinator_db.node_completed_batch(self.hostname,
                                                     self.current_batch)
    def run_batch(self, batch_jobs, batch_inputs):
        batch_id = self.coordinator_db.next_batch_id

        log.info("Running batch %d with the following job(s): %s" %
                 (batch_id, ', '.join(map(str, batch_jobs))))

        # Create log directory for the current batch
        batch_logs = create_batch_directory(self.log_directory, batch_id)

        # Copy description files to the log directory
        description_dir = os.path.join(os.path.dirname(__file__), os.pardir,
                                       os.pardir, os.pardir, "tritonsort",
                                       "mapreduce", "description")
        shutil.copy(os.path.join(description_dir, "stages.json"), batch_logs)
        shutil.copy(os.path.join(description_dir, "structure.json"),
                    batch_logs)

        # Copy config file to log directory
        shutil.copy(self.config_file, batch_logs)

        self.ready_for_next_batch = False

        # Pull out relevant phase zero parameters
        phase_zero_sample_rate = 1  # Sample 100% by default
        if "SAMPLE_RATE" in self.config:
            phase_zero_sample_rate = float(self.config["SAMPLE_RATE"])
        phase_zero_sample_points_per_file = 1  # Sample prefixes by default
        if "SAMPLES_PER_FILE" in self.config:
            phase_zero_sample_points_per_file = \
                int(self.config["SAMPLES_PER_FILE"])
        fixed_key_length = None
        if "MAP_INPUT_FIXED_KEY_LENGTH" in self.config:
            fixed_key_length = int(self.config["MAP_INPUT_FIXED_KEY_LENGTH"])
        fixed_value_length = None
        if "MAP_INPUT_FIXED_VALUE_LENGTH" in self.config:
            fixed_value_length = \
                int(self.config["MAP_INPUT_FIXED_VALUE_LENGTH"])

        # If the application config file (yaml) or the job spec file (json)
        # skips a phase, we should not load read requests for that phase. The
        # job spec file should override the application config file.
        skip_phase_zero = 0
        skip_phase_one = 0
        skip_phase_two = 0
        skip_phase_three = 0
        if "SKIP_PHASE_ZERO" in self.config and self.config["SKIP_PHASE_ZERO"]:
            skip_phase_zero = 1
        if "SKIP_PHASE_ONE" in self.config and self.config["SKIP_PHASE_ONE"]:
            skip_phase_one = 1
        if "SKIP_PHASE_TWO" in self.config and self.config["SKIP_PHASE_TWO"]:
            skip_phase_two = 1
        if "SKIP_PHASE_THREE" in self.config and \
                self.config["SKIP_PHASE_THREE"]:
            skip_phase_three = 1

        # The run_job.py script verifies that all jobs in the batch have the
        # same value of these skip parameters in the job specs, so we can just
        # check the first one.
        for key, value in (self.coordinator_db.job_params(
                batch_jobs[0]).items()):
            if key == "SKIP_PHASE_ZERO":
                skip_phase_zero = value
            if key == "SKIP_PHASE_ONE":
                skip_phase_one = value
            if key == "SKIP_PHASE_TWO":
                skip_phase_two = value
            if key == "SKIP_PHASE_THREE":
                skip_phase_three = value
            if key == "MAP_INPUT_FIXED_KEY_LENGTH":
                fixed_key_length = int(value)
            if key == "MAP_INPUT_FIXED_VALUE_LENGTH":
                fixed_value_length = int(value)

        fixed_tuple_length = None
        if fixed_key_length != None and fixed_value_length != None:
            fixed_tuple_length = fixed_key_length + fixed_value_length

        use_replication = False
        if "OUTPUT_REPLICATION_LEVEL" in self.config and \
                int(self.config["OUTPUT_REPLICATION_LEVEL"]) > 1:
            use_replication = True

        phases = []
        if not skip_phase_zero:
            phases.append(0)
        if not skip_phase_one:
            phases.append(1)
        if not skip_phase_two and use_replication:
            # If we're using replication, phase two will have network transfer,
            # use barriers to guarantee sockets are connected.
            phases.append(2)
        if not skip_phase_three and use_replication:
            # If we're using replication, phase three will have network
            # transfer, use barriers to guarantee sockets are connected.
            phases.append(3)

        # Setup barriers
        self.coordinator_db.create_barriers(phases, batch_id, batch_jobs)

        # Generate read requests for the jobs in the batch
        read_requests = generate_read_requests(
            job_inputs = batch_inputs,
            phase_zero_sample_rate = phase_zero_sample_rate,
            phase_zero_sample_points_per_file =\
                phase_zero_sample_points_per_file,
            tuple_start_offset = fixed_tuple_length,
            job_ids = batch_jobs, phases=phases)

        # Load read requests into read request queue for each worker
        load_read_requests(self.coordinator_db, read_requests)

        start_time = time.time()
        # Mark phase zero as starting now.
        self.coordinator_db.begin_phase(batch_id, "phase_zero")
        self.batch_phase_info[batch_id] = ("phase_zero", 0, start_time)
        log.info("Running phase_zero...")
        print_keyboard_commands()

        for job_id in batch_jobs:
            self.coordinator_db.update_job_status(
                job_id, {
                    "start_time": str(start_time),
                    "batch_id": batch_id,
                    "date": time.asctime()
                })

        self.coordinator_db.add_jobs_to_batch(batch_id, batch_jobs)

        self.coordinator_db.mark_batch_incomplete(batch_id)

        # Setting current_batch will cause all node coordinators to start work
        # on that batch
        self.coordinator_db.add_batch_to_node_coordinator_batch_queues(
            batch_id)
    def run(self):
        # Any pending batches won't be processed by this client
        self.coordinator_db.clear_batch_queue(self.hostname)

        remaining_live_retries = 10

        # Make sure the entire cluster is ping-able
        nodes = list(self.coordinator_db.live_nodes)
        self.coordinator_db.wait_for_ping_request(self.hostname)
        # Issue fping command to the entire cluster
        log.info("Pinging %s" % nodes)
        command = fping["-u"]
        for node in nodes:
            command = command[node]
        unreachable_nodes = command()
        unreachable_nodes = unreachable_nodes.encode("ascii")
        log.info("Unreachable nodes: %s" % unreachable_nodes)
        # Report results to the cluster coordinator
        self.coordinator_db.send_ping_reply(self.hostname, unreachable_nodes)

        while True:
            # Re-grab my node ID, the list of nodes, and the number of
            # intermediate disks on each node
            nodes = list(self.coordinator_db.live_nodes)
            nodes.sort()

            try:
                node_id = nodes.index(self.hostname)
                remaining_live_retries = 10

            except ValueError:
                error_message = (
                    ("Can't find my hostname (%s) in the list of valid "
                     "nodes") % (self.hostname))
                log.error(error_message)

                # Sleep for a little while and try again
                remaining_live_retries -= 1

                if remaining_live_retries == 0:
                    raise RuntimeError(error_message)
                else:
                    time.sleep(1)

                continue

            intermediate_disk_counts = []

            for node in nodes:
                intermediate_disk_counts.append(
                    len(self.coordinator_db.local_disks(node)))

            # Make sure we have the same number of intermediate disks on each
            # node.
            if len(set(intermediate_disk_counts)) != 1:
                error_message = (
                    ("All nodes should have the same number of intermediate "
                     "disks, but counts are %s") % (intermediate_disk_counts))
                log.error(error_message)
                raise RuntimeError(error_message)
            num_intermediate_disks = intermediate_disk_counts[0]

            node_ips = map(lambda x: self.coordinator_db.ipv4_address(x), nodes)

            self.ip_address = node_ips[node_id]

            # Get IPs for all interfaces
            node_interface_ips = map(
                lambda x: self.coordinator_db.interfaces(x), nodes)

            intermediate_disks = self.coordinator_db.local_disks(self.hostname)

            # If we're writing output to local disks, we need to know what
            # those local disks are
            output_disks = self.coordinator_db.io_disks(self.hostname)

            # Get the next batch number from the coordinator
            log.info("Waiting for the next batch ...")
            self.current_batch = (
                self.coordinator_db.blocking_wait_for_next_batch(
                    self.hostname))

            log.info("Running batch %d" % (self.current_batch))

            # Make a temporary directory to hold logical disk counts and
            # partition information; put a nonce in the directory name to avoid
            # collisions. Store it on this node's first intermediate disk to
            # avoid running into /tmp size limits

            tmp_files_dir = os.path.join(
                intermediate_disks[0],
                "%(username)s_tempfiles_batch_%(batch_number)d_%(nonce)x" % {
                    "username" : self.username,
                    "batch_number" : self.current_batch,
                    "nonce" : self.batch_nonce })

            assert not os.path.exists(tmp_files_dir)

            os.makedirs(tmp_files_dir)

            # Construct log directory based on current batch
            base_log_dir = create_batch_directory(
                self.log_directory, self.current_batch)

            batch_jobs = self.coordinator_db.batch_jobs(self.current_batch)
            # Determine which phases we're running based on the app config and
            # and first job's job-spec
            job_params = self.coordinator_db.job_params(batch_jobs[0])
            skip_params = [
                "SKIP_PHASE_ZERO", "SKIP_PHASE_ONE", "SKIP_PHASE_TWO",
                "SKIP_PHASE_THREE"]
            skipped_phases = {}
            for param in skip_params:
                # By default don't skip the phase
                skipped_phases[param] = False
                # First load app config
                if param in self.config:
                    skipped_phases[param] = self.config[param]
                # Then load job spec
                if param in job_params:
                    skipped_phases[param] = job_params[param]

            # Special case for daytona minutesort
            daytona_minutesort = False
            if "DAYTONA_MINUTESORT" in job_params and \
                    job_params["DAYTONA_MINUTESORT"]:
                daytona_minutesort = True
                skipped_phases["SKIP_PHASE_ZERO"] = False
                skipped_phases["SKIP_PHASE_ONE"] = True
                skipped_phases["SKIP_PHASE_TWO"] = True
                skipped_phases["SKIP_PHASE_THREE"] = True

            # Need to make a disk-backed boundary list file for each job in the
            # batch, and retrieve any boundary list files for jobs that those
            # jobs are recovering

            global_boundary_list_files = self.lookup_global_boundary_lists(
                batch_jobs, base_log_dir)

            if type(global_boundary_list_files) == int:
                # There was some sort of error while grabbing the boundary
                # file for the returned job; abort this job
                self.fail_current_batch(
                    "Couldn't fetch global boundary list files for job %d" % (
                        global_boundary_list_files))
                self.coordinator_db.node_completed_batch(
                    self.hostname, self.current_batch)
                continue

            # If any part of the batch fails, we should skip all subsequent
            # parts, but still clean up appropriately
            continue_batch = True
            logical_disk_counts_files = {}
            boundary_list_files = {}

            command_params = {
                "OUTPUT_DISK_LIST" : ','.join(output_disks),
                "MYPEERID" : node_id,
                "MY_IP_ADDRESS" : self.ip_address,
                "PEER_LIST" : ','.join(node_interface_ips),
                "NUM_INTERFACES" : self.num_interfaces,
                "CONFIG" : self.config_file,
                "DEFAULT_CONFIG" : self.default_config,
                "SKIP_PHASE_ONE" : 1,
                "SKIP_PHASE_TWO" : 1,
                "SKIP_PHASE_THREE" : 1,
                "COORDINATOR.HOSTNAME" : self.redis_host,
                "COORDINATOR.PORT" : self.redis_port,
                "COORDINATOR.DB" : self.redis_db,
                "BATCH_ID" : str(self.current_batch),
                "NUM_INPUT_DISKS" :
                    len(self.coordinator_db.io_disks(self.hostname))
                }

            if skipped_phases["SKIP_PHASE_ZERO"] == False:
                # Execute phase zero for each job in the batch
                for job_id in batch_jobs:

                    if not continue_batch:
                        break

                    phase_zero_log_dir = os.path.join(
                        base_log_dir, "phase_zero_job_%d" % (job_id))

                    logical_disk_counts_file = os.path.join(
                        tmp_files_dir, "logical_disk_counts.%d" % (job_id))
                    logical_disk_counts_files[job_id] = logical_disk_counts_file

                    boundary_list_file = os.path.join(
                        tmp_files_dir, "boundary_list.%d" % (job_id))
                    boundary_list_files[job_id] = boundary_list_file

                    command_params["LOG_DIR"] = phase_zero_log_dir
                    command_params["LOGICAL_DISK_COUNTS_FILE"] = logical_disk_counts_file
                    command_params["BOUNDARY_LIST_FILE"] = boundary_list_file
                    command_params["JOB_IDS"] = str(job_id)

                    for job_id in global_boundary_list_files:
                        param_name = "DISK_BACKED_BOUNDARY_LIST.%d" % (job_id)

                        command_params[param_name] = (
                            global_boundary_list_files[job_id])

                    if daytona_minutesort:
                        for job_id, filename in boundary_list_files.items():
                            command_params["BOUNDARY_LIST_FILE.%d" % (job_id)] = \
                                filename


                    # Pull in any parameters that may have been set for this job,
                    # overriding the parameters set above
                    for key, value in (
                        self.coordinator_db.job_params(job_id).items()):

                        command_params[key] = value

                    continue_batch = self._run_themis(
                        self.themis_binary, command_params, phase_zero_log_dir)

                    # Copy one of the logical disk counts file to a well-known
                    # location

                    if continue_batch and node_id == 0:
                        if os.path.exists(logical_disk_counts_file):
                            shutil.copy(
                                logical_disk_counts_file,
                                os.path.join(
                                    phase_zero_log_dir,
                                    os.path.basename(logical_disk_counts_file)))
                        else:
                            log.error("Can't find logical disk counts file '%s'"
                                      % (logical_disk_counts_file))

            # Notify redis that we're done with phase zero
            self.coordinator_db.phase_completed(
                self.current_batch, self.ip_address, "phase_zero")

            if skipped_phases["SKIP_PHASE_ONE"] == False:
                # Execute phase one with all jobs at once
                if continue_batch:
                    phase_one_log_dir = os.path.join(base_log_dir, "phase_one")

                    if "BOUNDARY_LIST_FILE" in command_params:
                        del command_params["BOUNDARY_LIST_FILE"]
                    if "LOGICAL_DISK_COUNTS_FILE" in command_params:
                        del command_params["LOGICAL_DISK_COUNTS_FILE"]
                    if "SKIP_PHASE_ONE" in command_params:
                        del command_params["SKIP_PHASE_ONE"]
                    command_params["SKIP_PHASE_ZERO"] = 1
                    command_params["SKIP_PHASE_TWO"] = 1
                    command_params["SKIP_PHASE_THREE"] = 1
                    command_params["JOB_IDS"] = ','.join(map(str, batch_jobs))

                    command_params["LOG_DIR"] = phase_one_log_dir

                    for job_id, filename in logical_disk_counts_files.items():
                        command_params["LOGICAL_DISK_COUNTS_FILE.%d" % (job_id)] = \
                            filename

                    for job_id, filename in boundary_list_files.items():
                        command_params["BOUNDARY_LIST_FILE.%d" % (job_id)] = \
                            filename

                    for job_id in batch_jobs:
                        # Pull in any parameters that may have been set for this job,
                        # overriding the parameters set above
                        # \\\TODO(MC): This doesn't work for multiple jobs.
                        for key, value in (
                            self.coordinator_db.job_params(job_id).items()):

                            command_params[key] = value

                    continue_batch = self._run_themis(
                        self.themis_binary, command_params, phase_one_log_dir)

            # Notify redis that we're done with phase one
            self.coordinator_db.phase_completed(
                self.current_batch, self.ip_address, "phase_one")

            if skipped_phases["SKIP_PHASE_TWO"] == False:
                # Execute phase two with all jobs at once
                if continue_batch:
                    phase_two_log_dir = os.path.join(base_log_dir, "phase_two")

                    if "SKIP_PHASE_TWO" in command_params:
                        del command_params["SKIP_PHASE_TWO"]
                    command_params["SKIP_PHASE_ZERO"] = 1
                    command_params["SKIP_PHASE_ONE"] = 1
                    command_params["SKIP_PHASE_THREE"] = 1
                    command_params["LOG_DIR"] = phase_two_log_dir

                    # Execute phase two
                    continue_batch = self._run_themis(
                        self.themis_binary + "_phase_two", command_params,
                        phase_two_log_dir)

            # Notify redis that we're done with phase two
            self.coordinator_db.phase_completed(
                self.current_batch, self.ip_address, "phase_two")

            if skipped_phases["SKIP_PHASE_THREE"] == False:
                # Execute phase three for each job in the batch
                for job_id in batch_jobs:
                    if not continue_batch:
                        break

                    phase_three_log_dir = os.path.join(
                        base_log_dir, "phase_three_job_%d" % (job_id))

                    if "SKIP_PHASE_THREE" in command_params:
                        del command_params["SKIP_PHASE_THREE"]
                    command_params["SKIP_PHASE_ZERO"] = 1
                    command_params["SKIP_PHASE_ONE"] = 1
                    command_params["SKIP_PHASE_TWO"] = 1
                    command_params["LOG_DIR"] = phase_three_log_dir

                    # Execute phase three
                    continue_batch = self._run_themis(
                        self.themis_binary, command_params, phase_three_log_dir)

            # Notify redis that we're done with phase three
            self.coordinator_db.phase_completed(
                self.current_batch, self.ip_address, "phase_three")

            if continue_batch:
                log.info("Batch %d succeeded" % (self.current_batch))
            else:
                log.info("Batch %d failed" % (self.current_batch))

            # Done processing this batch
            self.coordinator_db.node_completed_batch(
                self.hostname, self.current_batch)