Ejemplo n.º 1
0
class UnifBFBSimulation(Simulation):
    ##
    # __init__() from Simulation
    #

    ##
    # Initialize UnifBFBSimulation
    #
    def init(self):
        self.logger = logging.getLogger(__name__)
        # self.logger.setLevel(logging.ERROR)
        self.logger.setLevel(logging.INFO)
        # self.logger.setLevel(logging.DEBUG)
        self.logger.addHandler(console)
        self.logger.propagate = False

        # Failure biasing prob
        self.fb_prob = float(self.is_parms.fb_prob)
        # Arrival rate of homogeneous Poisson process, beta
        self.poisson_rate = float(self.is_parms.beta)
        # Likelihood ratio
        self.lr = float(1.)

        self.logger.debug(
            "UnifBFBSimulation init() - fb_prob = %.6f, poisson_rate = %.6f",
            self.fb_prob, self.poisson_rate)

    ##
    # Reset the simulator
    #
    def reset(self):
        # Reset clocks and state for each disk
        for disk in self.disks:
            disk.init_clock(0)
            disk.init_state()

        # Reset clocks and state for each node
        for node in self.nodes:
            node.init_clock(0)
            node.init_state()

        # Reset clocks and state for each rack
        for rack in self.racks:
            rack.init_state()

        # Reset system state
        self.state = State(self.num_disks, self.num_nodes)

        # Rest repair queue
        self.repair_queue = []

        # Regenerate new placement
        self.placement = Placement(self.num_racks, self.nodes_per_rack,
                                   self.disks_per_node, self.capacity_per_disk,
                                   self.num_stripes, self.chunk_size,
                                   self.code_type, self.n, self.k,
                                   self.place_type, self.chunk_rack_config,
                                   self.l)
        # Reset LR
        self.lr = float(1.)

        self.total_failure_rate = 0.
        self.total_failrue_rate_cnt = 0
        self.total_repair_rate = 0.
        self.total_repair_rate_cnt = 0

    ##
    # Get failure rate
    #
    def get_failure_rate(self):
        fail_rate = float(0)

        for disk in self.disks:
            fail_rate += disk.curr_disk_fail_rate()

        for node in self.nodes:
            fail_rate += node.curr_node_fail_rate()

        # self.logger.debug("get_failure_rate(): fail_rate = %.6f", fail_rate)
        # print("get_failure_rate(): fail_rate = %.6f" % fail_rate)
        return fail_rate

    ##
    # Get the probability of node failure
    # To decide whether a failure event is node failure or disk failure
    #
    def get_node_failure_prob(self):
        comp_fail_rate = float(0)
        node_fail_rate = float(0)
        for disk in self.disks:
            comp_fail_rate += disk.curr_disk_fail_rate()
        for node in self.nodes:
            node_fail_rate += node.curr_node_fail_rate()

        return node_fail_rate / (node_fail_rate + comp_fail_rate)

    ##
    # Calculate the repair time for a failed component
    # The repair time = the amount of cross_rack data to download / cross_rack bandwidth
    #
    def get_disk_repair_duration(self, disk_idx):
        if not self.use_network:
            # get the repair time from a pre-defined repair distribution
            return self.disk_repair_dists.draw()
        else:
            # repair time = cross-rack repair traffic / available cross-rack bandwidth
            rack_id = disk_idx / (self.nodes_per_rack * self.disks_per_node)
            cross_rack_download = 0
            stripes_to_repair = self.placement.get_stripes_to_repair(disk_idx)
            # self.num_stripes_repaired += len(stripes_to_repair)
            # stripes_to_delay = []

            # print("len(stripes_to_repair) = %d" % len(stripes_to_repair))
            # for each stripe to repair
            for stripe_id in stripes_to_repair:
                num_failed_chunk = 0
                num_alive_chunk_same_rack = 0
                idx = 0
                fail_idx = 0
                alive_chunk_same_rack = []

                # check the status of each chunk in the stripe
                for disk_id in self.placement.get_stripe_location(stripe_id):

                    # for RS, DRC
                    if self.placement.code_type != Placement.CODE_TYPE_LRC:
                        if self.disks[disk_id].get_curr_state(
                        ) == Disk.STATE_CRASHED:
                            num_failed_chunk += 1
                        elif (disk_id / (self.nodes_per_rack *
                                         self.disks_per_node)) == rack_id:
                            num_alive_chunk_same_rack += 1
                    # for LRC
                    else:
                        if self.disks[disk_id].get_curr_state(
                        ) == Disk.STATE_CRASHED:
                            num_failed_chunk += 1
                            if disk_idx == disk_id:
                                fail_idx = idx
                        elif (disk_id / (self.nodes_per_rack *
                                         self.disks_per_node)) == rack_id:
                            num_alive_chunk_same_rack += 1
                            alive_chunk_same_rack.append(idx)
                        idx += 1

                # # this is a single-chunk repair
                # if num_failed_chunk == 1:
                #     self.num_stripes_repaired_single_chunk += 1

                # RS
                if self.placement.code_type == Placement.CODE_TYPE_RS:
                    if num_alive_chunk_same_rack < self.k:
                        cross_rack_download += (self.k -
                                                num_alive_chunk_same_rack)
                # LRC
                elif self.placement.code_type == Placement.CODE_TYPE_LRC:
                    if num_failed_chunk == 1:
                        # global parity
                        if fail_idx in self.placement.lrc_global_parity:
                            if num_alive_chunk_same_rack < self.k:
                                cross_rack_download += self.k - num_alive_chunk_same_rack
                        # data chunk or local parity
                        else:
                            # find which group that the failed chunk is in
                            fail_gid = 0
                            for gid in xrange(self.l):
                                if fail_idx in self.placement.lrc_data_group[gid] or \
                                        fail_idx == self.placement.lrc_local_parity[gid]:
                                    fail_gid = gid
                                    break
                            # find how many chunk in the same rack can be used for repair
                            num_alive_chunk_same_rack = 0
                            for each in alive_chunk_same_rack:
                                if each in self.placement.lrc_data_group[fail_gid] or \
                                        each == self.placement.lrc_data_group[fail_gid]:
                                    num_alive_chunk_same_rack += 1
                            if num_alive_chunk_same_rack < self.k / self.l:
                                cross_rack_download += self.k / self.l - num_alive_chunk_same_rack
                    else:
                        if num_alive_chunk_same_rack < self.k:
                            cross_rack_download += (self.k -
                                                    num_alive_chunk_same_rack)
                # DRC
                elif self.placement.code_type == Placement.CODE_TYPE_DRC:
                    if num_failed_chunk == 1:
                        if self.k == 5 and self.n == 9:
                            cross_rack_download += 1.0
                        elif self.k == 6 and self.n == 9:
                            cross_rack_download += 2.0
                        else:
                            print "Only support DRC - (9,6,3), (9,5,3)"
                    else:
                        if num_alive_chunk_same_rack < self.k:
                            cross_rack_download += (self.k -
                                                    num_alive_chunk_same_rack)
                else:
                    print "Not correct code type in set_disk_repair()!"

            repair_duration = cross_rack_download * self.chunk_size / \
                              float(self.network.get_avail_cross_rack_repair_bwth()) # seconds
            # print "repair_time = %.1f" % (repair_duration / 3600.)
            # print("repair_duration = %.10f, cross_rack_download=%d" % \
            #        (repair_duration / 3600., cross_rack_download))

            if repair_duration != 0:
                self.total_repair_rate += 3600. / repair_duration
                self.total_repair_rate_cnt += 1

            return repair_duration / 3600.  # hours

    def get_earliest_repair_time(self, curr_time):
        earliest_repair_time = curr_time
        if len(self.repair_queue) > 0:
            for repair_event in self.repair_queue:
                repair_event_time = repair_event[0]
                if repair_event_time > earliest_repair_time:
                    earliest_repair_time = repair_event_time

        return earliest_repair_time

    ##
    # Set next repair time for disk indexed with disk_index
    #
    def set_disk_repair(self, disk_idx, curr_time):
        heappush(self.repair_queue, (self.get_disk_repair_duration(disk_idx) +
                                     self.get_earliest_repair_time(curr_time),
                                     Disk.EVENT_DISK_REPAIR, disk_idx))

    ##
    # Set new node repair time for node node_idx
    #
    def set_node_repair(self, node_idx, curr_time):
        node_repair_duration = 0
        # Get the repair duration of each disk on this node
        for i in xrange(self.disks_per_node):
            disk_idx = self.disks_per_node * node_idx + i
            node_repair_duration += self.get_disk_repair_duration(disk_idx)

        heappush(
            self.repair_queue,
            (node_repair_duration + self.get_earliest_repair_time(curr_time),
             Node.EVENT_NODE_REPAIR, node_idx))

    ##
    # Get the next event in UnifBFBSimulation
    #
    def get_next_event(self, curr_time):
        # Update clock for each disk
        for disk in self.disks:
            disk.update_clock(curr_time)

        # Update clock for each node
        for node in self.nodes:
            node.update_clock(curr_time)

        # If not in a failed state, then draw for next failure
        if self.state.get_sys_state() == self.state.CURR_STATE_OK:
            failure_queue = []

            for each_disk in range(self.num_disks):
                failure_queue.append(
                    (self.disks[each_disk].disk_fail_distr.
                     draw_inverse_transform(self.disks[each_disk].read_clock())
                     + curr_time, Disk.EVENT_DISK_FAIL, each_disk))

            for each_node in range(self.num_nodes):
                failure_queue.append(
                    (self.nodes[each_node].node_fail_distr.
                     draw_inverse_transform(self.nodes[each_node].read_clock())
                     + curr_time, Node.EVENT_NODE_FAIL, each_node))

            heapify(failure_queue)
            (next_event_time, next_event_type,
             next_event_subsystem) = heappop(failure_queue)

            if next_event_type == Disk.EVENT_DISK_FAIL:
                self.disks[next_event_subsystem].fail_disk(next_event_time)
                self.set_disk_repair(next_event_subsystem, next_event_time)
            elif next_event_type == Node.EVENT_NODE_FAIL:
                self.nodes[next_event_subsystem].fail_node(next_event_time)
                for each_disk_on_this_node in range(
                        next_event_subsystem * self.disks_per_node,
                    (next_event_subsystem + 1) * self.disks_per_node):
                    self.disks[each_disk_on_this_node].fail_disk(
                        next_event_time)
                self.set_node_repair(next_event_subsystem, next_event_time)
            else:
                self.logger.error(
                    "UnifBFBSimulation - get_next_event(): wrong next_event_type!"
                )

            return (next_event_time, next_event_type, next_event_subsystem)

        elif self.state.get_sys_state() == self.state.CURR_STATE_DEGRADED:
            if not self.repair_queue:
                self.logger.error(
                    "UnifBFBSimulation - get_next_event(): repair_queue is empty!"
                )
                sys.exit(2)

            (repair_time, repair_event, subsystem_idx) = self.repair_queue[0]
            next_event_time = nprandom.exponential(
                self.poisson_rate) + curr_time

            if repair_time <= next_event_time:
                heappop(self.repair_queue)
                if repair_event == Disk.EVENT_DISK_REPAIR:
                    self.disks[subsystem_idx].repair_disk(repair_time)
                    return (repair_time, Disk.EVENT_DISK_REPAIR, subsystem_idx)
                elif repair_event == Node.EVENT_NODE_REPAIR:
                    self.nodes[subsystem_idx].repair_node()
                    for i in range(self.disks_per_node):
                        disk_idx = subsystem_idx * self.disks_per_node + i
                        self.disks[disk_idx].repair_disk(repair_time)
                    return (repair_time, Node.EVENT_NODE_REPAIR, subsystem_idx)
                else:
                    self.logger.error(
                        "UnifBFBSimulation - get_next_event(): wrong repair_event!"
                    )

            for disk in self.disks:
                disk.update_clock(next_event_time)
            for node in self.nodes:
                node.update_clock(next_event_time)

            self.total_failure_rate += self.get_failure_rate()
            self.total_failrue_rate_cnt += 1

            draw = nprandom.uniform()
            # Determine whether it is a "real" event or "pseudo" event
            if draw > self.fb_prob:
                # It is a pseudo event
                old_lr = self.lr
                self.lr *= (1. - self.get_failure_rate() /
                            self.poisson_rate) / (1. - self.fb_prob)
                self.logger.debug(
                    "get_next_event(): pseudo event - old_lr = %.10f, update, lr = %.10f",
                    old_lr, self.lr)
                # Return nothing because we are staying in the current state
                return (next_event_time, None, None)

            else:
                # Randomly fail a disk or node
                # prob_node_failure = self.get_node_failure_prob()
                if nprandom.uniform() > self.get_node_failure_prob():
                    # disk failure
                    avail_disks = self.state.get_avail_disks()
                    fail_disk_idx = avail_disks[random.randint(
                        0,
                        len(avail_disks) - 1)]

                    old_lr = self.lr
                    # self.lr *= (self.disks[fail_disk_idx].curr_disk_fail_rate() / self.poisson_rate) \
                    #            / (self.fb_prob * (1 - prob_node_failure) / len(avail_disks))
                    # The above equation equals to the following
                    self.lr *= (self.get_failure_rate() /
                                self.poisson_rate) / self.fb_prob
                    self.logger.debug(
                        "get_next_event(): disk failure event, lr = %.10f, update, lr = %.10f",
                        old_lr, self.lr)

                    self.disks[fail_disk_idx].fail_disk(next_event_time)
                    self.set_disk_repair(fail_disk_idx, next_event_time)

                    return (next_event_time, Disk.EVENT_DISK_FAIL,
                            fail_disk_idx)

                else:
                    avail_nodes = self.state.get_avail_nodes()
                    fail_node_idx = avail_nodes[random.randint(
                        0,
                        len(avail_nodes) - 1)]

                    old_lr = self.lr
                    # self.lr *= (self.nodes[fail_node_idx].curr_node_fail_rate() / self.poisson_rate) \
                    #            / (self.fb_prob * prob_node_failure / len(avail_nodes))
                    # The above equation equals to the following
                    self.lr *= (self.get_failure_rate() /
                                self.poisson_rate) / self.fb_prob
                    self.logger.debug(
                        "get_next_event(): node failure event - old_lr = %.10f, update, lr = %.10f",
                        old_lr, self.lr)

                    # Update internal node state
                    self.nodes[fail_node_idx].fail_node(next_event_time)
                    for each_disk_on_failed_node in range(
                            fail_node_idx * self.disks_per_node,
                        (fail_node_idx + 1) * self.disks_per_node):
                        self.disks[each_disk_on_failed_node].fail_disk(
                            next_event_time)

                    # Schedule repair for the failed node
                    self.set_node_repair(fail_node_idx, next_event_time)

                    return (next_event_time, Node.EVENT_NODE_FAIL,
                            fail_node_idx)

    ##
    # Run an iteration in UnifBFBSimulation
    #
    def run_iteration(self, ite=0):
        self.reset()
        curr_time = 0
        self.logger.info(
            "UnifBFBSimulator: begin an iteration %d, num_failed_disks = %d, "
            "avail_cross_rack_bwth = %d" %
            (ite, len(self.state.get_failed_disks()),
             self.network.get_avail_cross_rack_repair_bwth()))

        while True:
            (event_time, event_type,
             subsystem_idx) = self.get_next_event(curr_time)
            curr_time = event_time

            if event_time > self.mission_time:
                break

            if event_type != None:
                self.logger.debug(
                    "Time: %.3f, event = %s, subsystem = %d, "
                    "number_failed_disks = %d, number_failed_nodes = %d" %
                    (event_time, event_type, subsystem_idx,
                     self.state.get_num_failed_disks(),
                     self.state.get_num_failed_nodes()))

                if not self.state.update_state_unifbfb(event_type,
                                                       subsystem_idx):
                    self.logger.error('Update_state_unifbfb failed!')

            # Check durability when disk failure or node failure happens
            if event_type == Disk.EVENT_DISK_FAIL or event_type == Node.EVENT_NODE_FAIL:
                failed_disks = self.state.get_failed_disks()
                if self.placement.check_data_loss(failed_disks):
                    self.logger.debug(
                        "===== END of one iteration, self.lr = %.10f",
                        min(self.lr, 1))
                    (num_failed_stripes, num_lost_chunks
                     ) = self.placement.get_num_failed_status(failed_disks)
                    self.logger.info("avg_failure_rate = %.6f" %
                                     (self.total_failure_rate /
                                      self.total_failrue_rate_cnt))
                    self.logger.info(
                        "avg_repair_rate = %.6f" %
                        (self.total_repair_rate / self.total_repair_rate_cnt))
                    return (min(self.lr, 1), "(%d, %d, 0, 0)" %
                            (num_failed_stripes, num_lost_chunks))

        # No data loss
        self.logger.debug(
            "END of one iteration, self.lr = 0 because no data loss")
        return (0, "(0, 0, 0, 0)")
Ejemplo n.º 2
0
class RegularSimulation(Simulation):
    ##
    # __init__() from Simulation
    #

    ##
    # Initialize the simulation
    #
    def init(self):
        # Initialize the state of the system
        self.state = State(self.num_disks)

        # Employ priority queue to keep all the failures and repairs
        # The element in the queue is (event_time, event_type, device_id)
        self.events_queue = []

        # Keep failed disks awaiting repair
        self.wait_repair_queue = []

        # Keep delayed stripes due to unavailable nodes
        # Key is the disk_idx delayed, value is the list of delayed stripes
        self.delayed_repair_dict = dict()

        self.enable_transient_failures = False

        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.ERROR)
        # self.logger.setLevel(logging.INFO)
        self.logger.addHandler(console)
        self.logger.propagate = False

    ##
    # Reset the simulation
    #
    def reset(self, ite=0):
        # Generate node transient and permanent failure events from trace
        if self.use_trace:
            for i in xrange(self.num_nodes):
                self.nodes[i] = Node(None, None, None,
                                     Trace(self.trace_id, i, 'p'),
                                     Trace(self.trace_id, i, 't'),
                                     Trace(self.trace_id, i, 'r'))

        self.state = State(self.num_disks)

        for disk in self.disks:
            disk.init_clock(0)
            disk.init_state()
        for node in self.nodes:
            node.init_state()
        for rack in self.racks:
            rack.init_state()

        self.events_queue = []
        self.wait_repair_queue = []
        self.delayed_repair_dict = dict()

        # generate disk failures and put them into events_queue
        for disk_id in xrange(len(self.disks)):
            disk_fail_time = self.disk_fail_dists.draw()
            if disk_fail_time <= self.mission_time:
                self.events_queue.append(
                    (disk_fail_time, Disk.EVENT_DISK_FAIL, disk_id))
        # generate node failures and push them into events_queue
        for node_id in xrange(self.num_nodes):
            if not self.use_trace:
                self.events_queue.append((self.node_fail_dists.draw(),
                                          Node.EVENT_NODE_FAIL, node_id))
                if self.enable_transient_failures:
                    self.events_queue.append(
                        (self.node_transient_fail_dists.draw(),
                         Node.EVENT_NODE_TRANSIENT_FAIL, node_id))
            else:
                for node_failure_time in self.nodes[
                        node_id].node_fail_trace.get_trace_ls():
                    # push node failure event to event_queue
                    self.events_queue.append(
                        (node_failure_time, Node.EVENT_NODE_FAIL, node_id))
                node_transient_failure_ls = self.nodes[
                    node_id].node_transient_fail_trace.get_trace_ls()
                node_transient_repair_ls = self.nodes[
                    node_id].node_transient_repair_trace.get_trace_ls()
                for ls_idx in xrange(len(node_transient_failure_ls)):
                    node_transient_failure_time = node_transient_failure_ls[
                        ls_idx]
                    node_transient_repair_time = node_transient_repair_ls[
                        ls_idx]
                    self.events_queue.append(
                        (node_transient_failure_time,
                         Node.EVENT_NODE_TRANSIENT_FAIL, node_id))
                    self.events_queue.append(
                        (node_transient_failure_time +
                         node_transient_repair_time,
                         Node.EVENT_NODE_TRANSIENT_REPAIR, node_id))

        # generate rack failures and push them into events_queue
        if not self.use_power_outage and self.enable_transient_failures:
            for rack_id in xrange(len(self.racks)):
                self.events_queue.append((self.rack_fail_dists.draw(),
                                          Rack.EVENT_RACK_FAIL, rack_id))

        # correlated failures caused by power outage
        if (not self.use_trace) and self.use_power_outage:
            for rack_id in xrange(self.num_racks):
                occur_time = float(0) + self.power_outage_dist.draw()
                while occur_time < self.mission_time:
                    self.events_queue.append(
                        (occur_time, Rack.EVENT_RACK_FAIL, rack_id))
                    occur_time += random.expovariate(
                        (1 / float(self.power_outage_duration)))
                    self.events_queue.append(
                        (occur_time, Rack.EVENT_RACK_REPAIR, rack_id))
                    for i in xrange(self.nodes_per_rack):
                        # draw a bernoulli distribution
                        if nprandom.binomial(1, 0.01):
                            self.events_queue.append(
                                (occur_time, Node.EVENT_NODE_FAIL,
                                 (self.nodes_per_rack * rack_id + i)))
                    occur_time += self.power_outage_dist.draw()

        heapify(self.events_queue)
        self.placement = Placement(self.num_racks, self.nodes_per_rack,
                                   self.disks_per_node, self.capacity_per_disk,
                                   self.num_stripes, self.chunk_size,
                                   self.code_type, self.n, self.k,
                                   self.place_type, self.chunk_rack_config,
                                   self.l)

        self.network = Network(self.num_racks, self.nodes_per_rack,
                               self.network_setting)

        self.num_stripes_repaired = 0
        self.num_stripes_repaired_single_chunk = 0
        self.num_stripes_delayed = 0

    ##
    # Generate permanent disk failure event
    #
    def set_disk_fail(self, disk_idx, curr_time):
        heappush(self.events_queue, (self.disk_fail_dists.draw() + curr_time,
                                     Disk.EVENT_DISK_FAIL, disk_idx))

    ##
    # Generate repair event for permanent disk failure
    #
    def set_disk_repair(self, disk_idx, curr_time):
        if not self.use_network:
            # get the repair time from a pre-defined repair distribution
            heappush(self.events_queue,
                     (self.disk_repair_dists.draw() + curr_time,
                      Disk.EVENT_DISK_REPAIR, disk_idx))
        else:
            # repair time = cross-rack repair traffic / available cross-rack bandwidth
            rack_id = disk_idx / (self.nodes_per_rack * self.disks_per_node)

            # If there is no available bandwidth or the rack is under transient failure
            if self.network.get_avail_cross_rack_repair_bwth() == 0 or \
                self.racks[rack_id].get_curr_state() != Rack.STATE_RACK_NORMAL:
                heappush(self.wait_repair_queue, (curr_time, disk_idx))
            else:
                cross_rack_download = 0
                stripes_to_repair = self.placement.get_stripes_to_repair(
                    disk_idx)
                self.num_stripes_repaired += len(stripes_to_repair)
                stripes_to_delay = []

                # for each stripe to repair
                for stripe_id in stripes_to_repair:
                    num_failed_chunk = 0
                    num_alive_chunk_same_rack = 0
                    num_unavail_chunk = 0
                    idx = 0
                    fail_idx = 0
                    alive_chunk_same_rack = []

                    # check the status of each chunk in the stripe
                    for disk_id in self.placement.get_stripe_location(
                            stripe_id):
                        # get the total number of unavailable chunk (due to permanent/transient failures) in this stripe
                        if self.disks[disk_id].state != Disk.STATE_NORMAL:
                            num_unavail_chunk += 1

                        # for RS, DRC
                        if self.placement.code_type != Placement.CODE_TYPE_LRC:
                            if self.disks[disk_id].get_curr_state(
                            ) == Disk.STATE_CRASHED:
                                num_failed_chunk += 1
                            elif (disk_id / (self.nodes_per_rack *
                                             self.disks_per_node)) == rack_id:
                                num_alive_chunk_same_rack += 1
                        # for LRC
                        else:
                            if self.disks[disk_id].get_curr_state(
                            ) == Disk.STATE_CRASHED:
                                num_failed_chunk += 1
                                if disk_idx == disk_id:
                                    fail_idx = idx
                            elif (disk_id / (self.nodes_per_rack *
                                             self.disks_per_node)) == rack_id:
                                num_alive_chunk_same_rack += 1
                                alive_chunk_same_rack.append(idx)
                            idx += 1

                    # this is a single-chunk repair
                    if num_failed_chunk == 1:
                        self.num_stripes_repaired_single_chunk += 1
                    # the repair for this stripe is delayed
                    if num_unavail_chunk > (self.n - self.k):
                        stripes_to_delay.append(stripe_id)

                    # RS
                    if self.placement.code_type == Placement.CODE_TYPE_RS:
                        if num_alive_chunk_same_rack < self.k:
                            cross_rack_download += (self.k -
                                                    num_alive_chunk_same_rack)
                    # LRC
                    elif self.placement.code_type == Placement.CODE_TYPE_LRC:
                        if num_failed_chunk == 1:
                            # global parity
                            if fail_idx in self.placement.lrc_global_parity:
                                if num_alive_chunk_same_rack < self.k:
                                    cross_rack_download += self.k - num_alive_chunk_same_rack
                            # data chunk or local parity
                            else:
                                # find which group that the failed chunk is in
                                fail_gid = 0
                                for gid in xrange(self.l):
                                    if fail_idx in self.placement.lrc_data_group[gid] or \
                                        fail_idx == self.placement.lrc_local_parity[gid]:
                                        fail_gid = gid
                                        break
                                # find how many chunk in the same rack can be used for repair
                                num_alive_chunk_same_rack = 0
                                for each in alive_chunk_same_rack:
                                    if each in self.placement.lrc_data_group[fail_gid] or \
                                        each == self.placement.lrc_data_group[fail_gid]:
                                        num_alive_chunk_same_rack += 1
                                if num_alive_chunk_same_rack < self.k / self.l:
                                    cross_rack_download += self.k / self.l - num_alive_chunk_same_rack
                        else:
                            if num_alive_chunk_same_rack < self.k:
                                cross_rack_download += (
                                    self.k - num_alive_chunk_same_rack)
                    # DRC
                    elif self.placement.code_type == Placement.CODE_TYPE_DRC:
                        if num_failed_chunk == 1:
                            if self.k == 5 and self.n == 9:
                                cross_rack_download += 1.0
                            elif self.k == 6 and self.n == 9:
                                cross_rack_download += 2.0
                            else:
                                print "Only support DRC - (9,6,3), (9,5,3)"
                        else:
                            if num_alive_chunk_same_rack < self.k:
                                cross_rack_download += (
                                    self.k - num_alive_chunk_same_rack)
                    else:
                        print "Not correct code type in set_disk_repair()!"

                repair_bwth = self.network.get_avail_cross_rack_repair_bwth()
                self.network.update_avail_cross_rack_repair_bwth(0)
                repair_time = cross_rack_download * self.chunk_size / float(
                    repair_bwth)  # seconds
                repair_time /= float(3600)  # hours

                if len(stripes_to_delay) != 0:
                    self.num_stripes_delayed += len(stripes_to_delay)
                    self.delayed_repair_dict[disk_idx] = stripes_to_delay

                self.logger.debug("repair_time = %d, repair_bwth = %d" %
                                  (repair_time, repair_bwth))
                heappush(self.events_queue,
                         (repair_time + curr_time, Disk.EVENT_DISK_REPAIR,
                          disk_idx, repair_bwth))

    ##
    # Generate permanent node failure event
    #
    def set_node_fail(self, node_idx, curr_time):
        heappush(self.events_queue, (self.node_fail_dists.draw() + curr_time,
                                     Node.EVENT_NODE_FAIL, node_idx))

    ##
    # Generate repair event for permanent node failure
    # The repair for the failed node is conducted by the repair for the failed disks on that node
    #
    def set_node_repair(self, node_idx, curr_time):
        for i in xrange(self.disks_per_node):
            disk_idx = node_idx * self.disks_per_node + i
            self.set_disk_repair(disk_idx, curr_time)

    ##
    # Generate transient node failure event
    #
    def set_node_transient_fail(self, node_idx, curr_time):
        heappush(self.events_queue,
                 (self.nodes[node_idx].node_transient_fail_distr.draw() +
                  curr_time, Node.EVENT_NODE_TRANSIENT_FAIL, node_idx))

    ##
    # Generate repair event for transient node failure
    #
    def set_node_transient_repair(self, node_idx, curr_time):
        heappush(self.events_queue,
                 (self.nodes[node_idx].node_transient_repair_distr.draw() +
                  curr_time, Node.EVENT_NODE_TRANSIENT_REPAIR, node_idx))

    ##
    # Generate transient rack failure
    #
    def set_rack_fail(self, rack_idx, curr_time):
        heappush(self.events_queue, (self.rack_fail_dists.draw() + curr_time,
                                     Rack.EVENT_RACK_FAIL, rack_idx))

    ##
    # Generate repair for transient rack failure
    #
    def set_rack_repair(self, rack_idx, curr_time):
        heappush(self.events_queue, (self.rack_repair_dists.draw() + curr_time,
                                     Rack.EVENT_RACK_REPAIR, rack_idx))

    ##
    # Get the next event from the event queue
    #
    def get_next_event(self, curr_time):
        self.logger.debug(
            "len(delayed_repair_dict) = %d, len(wait_repair_queue) = %d" %
            (len(self.delayed_repair_dict), len(self.wait_repair_queue)))
        # If there are some stripes delayed
        if len(self.delayed_repair_dict) != 0:
            items_to_remove = []  # keep the key of the items to remove
            for key in self.delayed_repair_dict:
                tmp_dict_value = []
                for stripe_id in self.delayed_repair_dict[key]:
                    repair_delay = False
                    num_unavail_chunk = 0
                    for disk_idx in self.placement.get_stripe_location(
                            stripe_id):
                        if self.disks[disk_idx].state != Disk.STATE_NORMAL:
                            num_unavail_chunk += 1
                        if num_unavail_chunk > (self.n - self.k):
                            repair_delay = True
                            break
                    if repair_delay:  # stripe whose repair is delayed
                        tmp_dict_value.append(stripe_id)
                if len(tmp_dict_value) == 0:
                    items_to_remove.append(key)
                else:
                    self.delayed_repair_dict[key] = tmp_dict_value
            for key in items_to_remove:
                self.delayed_repair_dict.pop(key)

        # If there are some failed disks awaiting repair
        if len(self.wait_repair_queue) != 0:
            disk_id = self.wait_repair_queue[0][1]
            rack_id = disk_id / (self.nodes_per_rack * self.disks_per_node)
            if self.use_network and self.network.get_avail_cross_rack_repair_bwth() != 0 and \
                self.network.get_avail_intra_rack_repair_bwth(rack_id) != 0 and \
                self.racks[rack_id].get_curr_state() == Rack.STATE_RACK_NORMAL:
                heappop(self.wait_repair_queue)
                self.set_disk_repair(disk_id, curr_time)

        next_event = heappop(self.events_queue)
        next_event_time = next_event[0]
        next_event_type = next_event[1]
        if next_event_time > self.mission_time:
            return (next_event_time, None, None)

        device_idx_set = []
        device_idx_set.append(next_event[2])
        repair_bwth_set = []
        # If use network bandwidth to calculate repair_time
        if self.use_network and next_event_type == Disk.EVENT_DISK_REPAIR:
            repair_bwth_set.append(next_event[3])

        # Gather the events with the same occurring time and event type
        while self.events_queue[0][0] == next_event_time and self.events_queue[
                0][1] == next_event_type:
            next_event = heappop(self.events_queue)
            device_idx_set.append(next_event[2])
            if self.use_network and next_event_type == Disk.EVENT_DISK_REPAIR:
                repair_bwth_set.append(next_event[3])

        # disk permanent failure
        if next_event_type == Disk.EVENT_DISK_FAIL:
            fail_time = next_event_time
            for device_idx in device_idx_set:
                # avoid the case that this disk is under repair
                if self.disks[device_idx].get_curr_state(
                ) != Disk.STATE_CRASHED:
                    if self.delayed_repair_dict.has_key(device_idx):
                        self.delayed_repair_dict.pop(device_idx)
                    # update the state of the disk
                    self.disks[device_idx].fail_disk(fail_time)
                    # generate the repair event
                    self.set_disk_repair(device_idx, fail_time)
            return (fail_time, Disk.EVENT_DISK_FAIL, device_idx_set)

        # node permanent failure
        elif next_event_type == Node.EVENT_NODE_FAIL:
            failed_disks_set = set([])
            fail_time = next_event_time
            for device_idx in device_idx_set:
                # avoid the case that the node is under repair
                if self.nodes[device_idx].get_curr_state(
                ) != Node.STATE_NODE_CRASHED:
                    # update the state of node
                    self.nodes[device_idx].fail_node(fail_time)
                    for i in xrange(self.disks_per_node):
                        disk_idx = device_idx * self.disks_per_node + i
                        failed_disks_set.add(disk_idx)
                        # avoid the case that the disk is under repair
                        if self.disks[disk_idx].get_curr_state(
                        ) != Disk.STATE_CRASHED:
                            if self.delayed_repair_dict.has_key(device_idx):
                                self.delayed_repair_dict.pop(device_idx)
                            # update the state of the disk
                            self.disks[disk_idx].fail_disk(fail_time)
                            # generate the repair event
                            self.set_disk_repair(disk_idx, fail_time)
            return (fail_time, Node.EVENT_NODE_FAIL, failed_disks_set)

        # node transient failure
        elif next_event_type == Node.EVENT_NODE_TRANSIENT_FAIL:
            fail_time = next_event_time
            for device_idx in device_idx_set:
                if self.nodes[device_idx].get_curr_state(
                ) == Node.STATE_NODE_NORMAL:
                    # update the state of node
                    self.nodes[device_idx].offline_node()
                    for i in xrange(self.disks_per_node):
                        disk_id = device_idx * self.disks_per_node + i
                        if self.disks[disk_id].get_curr_state(
                        ) == Disk.STATE_NORMAL:
                            # update the state of disk
                            self.disks[disk_id].offline_disk(fail_time)
                # generate the repair event
                if not self.use_trace:
                    self.set_node_transient_repair(device_idx, fail_time)

            return (fail_time, Node.EVENT_NODE_TRANSIENT_FAIL, None)

        # transient rack failure
        elif next_event_type == Rack.EVENT_RACK_FAIL:
            fail_time = next_event_time
            for device_idx in device_idx_set:
                if self.racks[device_idx].get_curr_state(
                ) == Rack.STATE_RACK_NORMAL:
                    # update the state of the rack
                    self.racks[device_idx].fail_rack(fail_time)
                    for i in xrange(self.nodes_per_rack):
                        # update the state of the node
                        node_idx = device_idx * self.nodes_per_rack + i
                        if self.nodes[node_idx].get_curr_state(
                        ) == Node.STATE_NODE_NORMAL:
                            self.nodes[node_idx].offline_node()
                            for j in xrange(self.disks_per_node):
                                # update the state of the disk
                                disk_idx = node_idx * self.disks_per_node + j
                                if self.disks[disk_idx].get_curr_state(
                                ) == Disk.STATE_NORMAL:
                                    self.disks[disk_idx].offline_disk(
                                        fail_time)
                # generate the repair event
                if not self.use_power_outage:
                    self.set_rack_repair(device_idx, fail_time)

            return (fail_time, Rack.EVENT_RACK_FAIL, None)

        # repair for permanent disk failure
        elif next_event_type == Disk.EVENT_DISK_REPAIR:
            repair_time = next_event_time
            for repair_disk_idx in device_idx_set:
                if self.disks[repair_disk_idx].get_curr_state(
                ) == Disk.STATE_CRASHED:
                    # update the state of the disk
                    self.disks[repair_disk_idx].repair_disk(repair_time)
                    # generate next permanent disk failure
                    self.set_disk_fail(repair_disk_idx, repair_time)

                # if the repair event is caused by permanent node failure
                node_idx = repair_disk_idx / self.disks_per_node
                if self.nodes[node_idx].get_curr_state(
                ) == Node.STATE_NODE_CRASHED:
                    all_disk_ok = True
                    for i in xrange(self.disks_per_node):
                        disk = self.disks[node_idx * self.disks_per_node + i]
                        if disk.get_curr_state() != disk.STATE_NORMAL:
                            all_disk_ok = False
                            break
                    if all_disk_ok:
                        # update the state of the node
                        self.nodes[node_idx].repair_node()
                        # generate next permanent node failure
                        if not self.use_trace:
                            self.set_node_fail(node_idx, repair_time)
            # update the network status
            if self.use_network:
                idx = 0
                for repair_disk_idx in device_idx_set:
                    repair_bwth = repair_bwth_set[idx]
                    self.network.update_avail_cross_rack_repair_bwth(
                        self.network.get_avail_cross_rack_repair_bwth() +
                        repair_bwth)
                    idx += 1

            # return the set of repaired disks
            return (repair_time, Disk.EVENT_DISK_REPAIR, device_idx_set)

        # repair for node transient failure
        elif next_event_type == Node.EVENT_NODE_TRANSIENT_REPAIR:
            repair_time = next_event_time
            for repair_node_idx in device_idx_set:
                # update the state of the node
                if self.nodes[repair_node_idx].get_curr_state(
                ) == Node.STATE_NODE_UNAVAILABLE:
                    self.nodes[repair_node_idx].online_node()
                    # update the state of the disk
                    for i in xrange(self.disks_per_node):
                        disk_id = repair_node_idx * self.disks_per_node + i
                        if self.disks[disk_id].get_curr_state(
                        ) == Disk.STATE_UNAVAILABLE:
                            self.disks[disk_id].online_disk(repair_time)
                # generate the next transient node failure
                if not self.use_trace:
                    self.set_node_transient_fail(repair_node_idx, repair_time)
            return (repair_time, Node.EVENT_NODE_TRANSIENT_REPAIR, None)

        # repair for rack transient failure
        elif next_event_type == Rack.EVENT_RACK_REPAIR:
            repair_time = next_event_time
            for repair_rack_idx in device_idx_set:
                if self.racks[repair_rack_idx].get_curr_state(
                ) == Rack.STATE_RACK_UNAVAILABLE:
                    # update the state of the rack
                    self.racks[repair_rack_idx].repair_rack()
                    for i in xrange(self.nodes_per_rack):
                        node_idx = repair_rack_idx * self.nodes_per_rack + i
                        # update the state of the node
                        if self.nodes[node_idx].get_curr_state(
                        ) == Node.STATE_NODE_UNAVAILABLE:
                            self.nodes[node_idx].online_node()
                            for j in xrange(self.disks_per_node):
                                disk_idx = node_idx * self.disks_per_node + j
                                # update the state of the disk
                                if self.disks[disk_idx].get_curr_state(
                                ) == Disk.STATE_UNAVAILABLE:
                                    self.disks[disk_idx].online_disk(
                                        repair_time)
                # generate the next transient rack failure
                if not self.use_power_outage:
                    self.set_rack_fail(repair_rack_idx, repair_time)

            return (repair_time, Rack.EVENT_RACK_REPAIR, None)

        else:
            self.logger.error('Wrong type of next_event in get_next_event()!')
            return None

    ##
    # Run an iteration of the simulator
    #
    def run_iteration(self, ite=0):
        self.reset()
        curr_time = 0

        self.logger.info(
            "Regular Simulator: begin an iteration %d, num_failed_disks = %d, "
            "avail_cross_rack_bwth = %d" %
            (ite, len(self.state.get_failed_disks()),
             self.network.get_avail_cross_rack_repair_bwth()))

        while True:
            (event_time, event_type,
             disk_id_set) = self.get_next_event(curr_time)
            curr_time = event_time
            if curr_time > self.mission_time:
                break
            # update the whole status
            if not self.state.update_state(event_type, disk_id_set):
                self.logger.error('update_state failed!')
            if event_type != None:
                self.logger.debug(
                    "Time %s, Event type: %s, Number of failed disks: %s\n" %
                    (event_time, event_type,
                     self.state.get_num_failed_disks()))

            # Check durability when disk_failure/node_failure happens
            if event_type == Disk.EVENT_DISK_FAIL or event_type == Node.EVENT_NODE_FAIL:
                if ite == 1:
                    self.logger.info(
                        "Time %s, Event type: %s, Number of failed disks: %s\n"
                        % (event_time, event_type,
                           self.state.get_num_failed_disks()))
                failed_disks = self.state.get_failed_disks()
                if self.placement.check_data_loss(failed_disks):
                    # the number of failed stripes and the number of lost chunks
                    (num_failed_stripes, num_lost_chunks
                     ) = self.placement.get_num_failed_status(failed_disks)
                    # Count in the delayed stripes
                    if len(self.delayed_repair_dict) != 0:
                        for key in self.delayed_repair_dict:
                            num_failed_stripes += len(
                                self.delayed_repair_dict[key])
                            num_lost_chunks += len(
                                self.delayed_repair_dict[key])
                    # Calculate blocked ratio
                    sum_unavail_time = 0
                    for disk_id in xrange(self.num_disks):
                        sum_unavail_time += self.disks[disk_id].get_unavail_time(curr_time) * \
                                            self.placement.get_num_chunks_per_disk(disk_id)
                    blocked_ratio = sum_unavail_time / (
                        self.placement.num_chunks * curr_time)
                    # Calculate the single-chunk repair ratio
                    single_chunk_repair_ratio = 0
                    self.logger.info(
                        "num_stripes_repaired_single_chunk = %d, num_stripes_repaired = %d"
                        % (self.num_stripes_repaired_single_chunk,
                           self.num_stripes_repaired))

                    if self.num_stripes_repaired != 0:
                        single_chunk_repair_ratio = float(self.num_stripes_repaired_single_chunk) / \
                                                    float(self.num_stripes_repaired)

                    return (1, "(%d, %d, %f, %f)" %
                            (num_failed_stripes, num_lost_chunks,
                             blocked_ratio, single_chunk_repair_ratio))

        # No data loss
        # Calculate blocked ratio
        sum_unavail_time = 0
        for disk_id in xrange(self.num_disks):
            sum_unavail_time += self.disks[disk_id].get_unavail_time(self.mission_time) * \
                                self.placement.get_num_chunks_per_disk(disk_id)
        blocked_ratio = sum_unavail_time / (self.placement.num_chunks *
                                            self.mission_time)
        # Calculate the single-chunk repair ratio
        single_chunk_repair_ratio = 0
        if self.num_stripes_repaired != 0:
            single_chunk_repair_ratio = float(self.num_stripes_repaired_single_chunk) / \
                                        float(self.num_stripes_repaired)

        return (0,
                "(0, 0, %f, %f)" % (blocked_ratio, single_chunk_repair_ratio))