def addCorrelatedFailures(self, cf_info): choose_from_una = cf_info[-1] # interval formats: [failure start time, failure end time, lost flag] # lost flag = False, unavailable event, default # lost flag = True, lost event. una_interval = [cf_info[0], cf_info[2] + cf_info[0], False] una_components = self._failedComponents(cf_info[1], una_interval) if cf_info[3] != None: dl_interval = [cf_info[0], cf_info[4] + cf_info[0], True] if cf_info[3] == cf_info[1]: for component in una_components: component.addFailureInterval(dl_interval) else: if choose_from_una: failed_num = splitMethod(cf_info[3], '_') components_for_choosen = [] if failed_num[1] == "machine": for component in una_components: if isinstance(component, Rack): for child in component.getChildren(): components_for_choosen.append(child) if isinstance(component, Machine): components_for_choosen.append(component) dl_components = sample(components_for_choosen, int(failed_num[0])) elif failed_num[1] == "disk": for component in una_components: if isinstance(component, Rack): disks = [] self.distributer.getAllDisksInRack(component, disks) for disk in disks: components_for_choosen.append(disk) if isinstance(component, Machine): for disk in component.getChildren(): components_for_choosen.append(disk) if isinstance(component, Disk): components_for_choosen.append(disk) dl_components = sample(components_for_choosen, int(failed_num[0])) else: raise Exception("Unsupport lost event!") for component in dl_components: component.addFailureInterval(dl_interval) else: # here, unavailable compoent should be excluded. # But, a little complicate to implement. dl_components = self._failedComponents(cf_info[3], dl_interval) for component in dl_components: if (component in una_components) or (component.getParent() in una_components) or (component.getParent().getParent() in una_components): component.updateFailureInterval(dl_interval, cf_info[2] + cf_info[0]) return self.distributer.getRoot()
def _failedComponents(self, info, interval): info.strip() scope = splitMethod(info, '_') failure_components = [] failed_amounts = [] length = len(scope) / 2 for i in xrange(length): failed_amounts.append(scope[2 * i:2 * (i + 1)]) # racks come from system tree, so we can not modify it. racks = self.distributer.getAllRacks() for num, component in failed_amounts: if component == "rack": failed_racks = sample(racks, int(num)) for rack in failed_racks: rack.addFailureInterval(interval) failure_components.append(rack) elif component == "machine": machines = [] for rack in racks: if rack in failure_components: continue else: machines += rack.getChildren() failed_machines = sample(machines, int(num)) for machine in failed_machines: machines.remove(machine) machine.addFailureInterval(interval) failure_components.append(machine) elif component == "disk": disks = [] for rack in racks: if rack in failure_components: continue rack_disks = [] self.distributer.getAllDisksInRack(rack, rack_disks) disks += rack_disks failed_disks = sample(disks, int(num)) for disk in failed_disks: disk.addFailureInterval(interval) failure_components.append(disk) return failure_components
def insertUpgradeDurations(self, upgrade_infos): freq = int(upgrade_infos[0]) domain = upgrade_infos[1] unit_num, unit = splitMethod(domain, '_') num = int(unit_num) upgrade_durations = [] num_of_rollings = int(self.conf.total_time) / freq start_ts = round(uniform(0, float(self.conf.total_time) % freq), 2) for j in xrange(1, num_of_rollings + 1): rolling_start = start_ts + j * freq if unit == "rack": loops = self.conf.rack_count / num remainder = self.conf.rack_count % num racks = self.distributer.getAllRacks() for i in xrange(loops): start_time = rolling_start + downtime * i duration = Duration(Duration.DurationType.Unavailable, start_time, start_time + downtime, racks[i * num, (i + 1) * num]) upgrade_durations.append(duration) if remainder != 0: start_time = rolling_start + downtime * loops duration = Duration(Duration.DurationType.Unavailable, start_time, start_time + downtime, racks[-remainder:]) upgrade_durations.append(duration) elif unit == "machine": pass else: raise Exception("Upgrade domain must be in rack or machine") if len(upgrade_infos) == 6: self.insertHardUpgrades(upgrade_infos) if len(upgrade_infos) == 4: self.insertSoftUpgrades(soft_infos)
def __init__(self, path=None): if path is None: conf_path = Configuration.path else: conf_path = path self.conf = getConfParser(conf_path) try: d = self.conf.defaults() except ConfigParser.NoSectionError: raise Exception("No Default Section!") self.total_time = int(d["total_time"]) # total active storage in PBs self.total_active_storage = float(d["total_active_storage"]) self.chunk_size = int(d["chunk_size"]) self.disk_capacity = float(d["disk_capacity"]) # tanslate TB of manufacturrer(10^12 bytes) into GBs(2^30 bytes) self.actual_disk_capacity = self.disk_capacity * pow(10, 12)/ pow(2, 30) self.max_chunks_per_disk = int(floor(self.actual_disk_capacity*1024/self.chunk_size)) self.disks_per_machine = int(d["disks_per_machine"]) self.machines_per_rack = int(d["machines_per_rack"]) self.rack_count = int(d["rack_count"]) self.datacenters = int(d.pop("datacenters", 1)) self.event_file = d.pop("event_file", None) # If n <= 15 in each stripe, no two chunks are on the same rack. self.num_chunks_diff_racks = 15 self.data_placement = d["data_placement"] if self.data_placement.lower() == "copyset": self.scatter_width = int(d["scatter_width"]) self.data_redundancy = d.pop("data_redundancy") data_redundancy = extractDRS(self.data_redundancy) # True means auto repair; False means manual repair self.auto_repair = self._bool(d.pop("auto_repair", "true")) self.overall_calculation = self._bool(d.pop("overall_calculation", "true")) self.lazy_recovery = self._bool(d.pop("lazy_recovery", "false")) self.lazy_only_available = self._bool(d.pop("lazy_only_available", "true")) self.recovery_bandwidth_cross_rack = int(d["recovery_bandwidth_cross_rack"]) self.queue_disable = self._bool(d.pop("queue_disable", "true")) self.bandwidth_contention = d["bandwidth_contention"] self.node_bandwidth = int(d["node_bandwidth"]) self.hierarchical = self._bool(d.pop("hierarchical", "false")) if self.hierarchical: self.distinct_racks = int(d.pop("distinct_racks")) self.recovery_bandwidth_intra_rack = int(d["recovery_bandwidth_intra_rack"]) self.parallel_repair = self._bool(d.pop("parallel_repair", "false")) self.availability_counts_for_recovery = self._bool(d[ "availability_counts_for_recovery"]) self.availability_to_durability_threshold = splitIntMethod(d["availability_to_durability_threshold"]) self.recovery_probability = splitIntMethod(d["recovery_probability"]) self.max_degraded_slices = float(d["max_degraded_slices"]) self.installment_size = int(d["installment_size"]) self.outputs = splitMethod(d["outputs"]) detect_intervals = d.pop("detect_intervals", None) if detect_intervals is None: self.rafi_recovery = False self.detect_intervals = detect_intervals else: self.rafi_recovery = True self.detect_intervals = splitFloatMethod(detect_intervals) self.drs_handler = getDRSHandler(data_redundancy[0], data_redundancy[1:]) if not self.lazy_recovery: self.recovery_threshold = self.drs_handler.n - 1 else: self.recovery_threshold = int(d.pop("recovery_threshold")) # total slices calculate like this only without heterogeneous redundancy self.total_slices = int(ceil(self.total_active_storage*pow(2,30)/(self.drs_handler.k*self.chunk_size))) self.chunk_repair_time, self.disk_repair_time, self.node_repair_time = self._repairTime()
def addSystemUpgrade(self, upgrade_info, end_time): style = upgrade_info[0] domain_infos = splitMethod(upgrade_info[1], '_') freq = upgrade_info[2] interval = upgrade_info[3] downtime = upgrade_info[4] upgrade_start = round(uniform(0, float(end_time)%freq), 3) upgrade_start_times = [] upgrade_times = int((end_time - upgrade_start)/freq) for j in xrange(1, upgrade_times+1): system_upgrade_start = j*freq + upgrade_start upgrade_start_times.append(system_upgrade_start) if domain_infos[1] == "rack": upgrade_domain_in_racks = int(domain_infos[0]) machines = self.distributer.getAllMachines() # racks = self.distributer.getAllRacks() loops = len(machines)/upgrade_domain_in_racks remainder = len(machines) % upgrade_domain_in_racks for i in xrange(loops): start_time = system_upgrade_start + (downtime + interval)*i upgrade_machines = machines[i*upgrade_domain_in_racks:(i+1)*upgrade_domain_in_racks] for item in upgrade_machines: for machine in item: machine.addFailureInterval([start_time, start_time + downtime, False]) if remainder != 0: start_time = system_upgrade_start + (downtime + interval) * loops upgrade_machines = machines[-remainder:] for item in upgrade_machines: for machine in item: machine.addFailureInterval([start_time, start_time + downtime, False]) elif domain_infos[1] == "machine": # machines per rack is divisible by upgrade_domain_in_machines upgrade_domain_in_machines = int(domain_infos[0]) machines_in_racks = self.distributer.getAllMachines() rack_count = len(machines_in_racks) machines_per_rack = len(machines_in_racks[0]) loops = (rack_count*machines_per_rack)/upgrade_domain_in_machines remainder = (rack_count*machines_per_rack)%upgrade_domain_in_machines machines = [] for rack in machines_in_racks: machines += rack for i in xrange(loops): # rack_index = i*upgrade_domain_in_machines/machines_per_rack # current_rack = machines[rack_index] start_time = system_upgrade_start + (downtime + interval)*i for a in xrange(upgrade_domain_in_machines): machine = machines[a + i*upgrade_domain_in_machines] machine.addFailureInterval([start_time, start_time+downtime, False]) if remainder != 0: # current_rack = machines[-1] start_time = system_upgrade_start + (downtime + interval) * loops # upgrade domain in "machine" means remainder <= machines_per_rack for j in xrange(-remainder, 0): machine = machines[j] machine.addFailureInterval([start_time, start_time+downtime, False]) else: pass return upgrade_start_times
def __init__(self, path=None): if path is None: conf_path = Configuration.path else: conf_path = path self.conf = getConfParser(conf_path) try: d = self.conf.defaults() except ConfigParser.NoSectionError: raise Exception("No Default Section!") self.total_time = int(d["total_time"]) # total active storage in PBs self.total_active_storage = float(d["total_active_storage"]) self.chunk_size = int(d["chunk_size"]) self.disk_capacity = float(d["disk_capacity"]) # tanslate TB of manufacturrer(10^12 bytes) into GBs(2^30 bytes) self.actual_disk_capacity = self.disk_capacity * pow(10, 12) / pow( 2, 30) self.max_chunks_per_disk = floor(self.actual_disk_capacity * 1024 / self.chunk_size) self.disks_per_machine = int(d["disks_per_machine"]) self.machines_per_rack = int(d["machines_per_rack"]) self.rack_count = int(d["rack_count"]) self.datacenters = 1 self.xml_file_path = d.pop("xml_file_path") self.event_file = d.pop("event_file", None) # If n <= 15 in each stripe, no two chunks are on the same rack. self.num_chunks_diff_racks = 15 self.data_redundancy = d["data_redundancy"] data_redundancy = extractDRS(self.data_redundancy) self.lazy_recovery = self._bool(d.pop("lazy_recovery", "false")) self.lazy_only_available = self._bool( d.pop("lazy_only_available", "true")) self.recovery_bandwidth_cross_rack = int( d["recovery_bandwidth_cross_rack"]) self.queue_disable = self._bool(d.pop("queue_disable", "true")) self.bandwidth_contention = d["bandwidth_contention"] self.node_bandwidth = int(d["node_bandwidth"]) self.parallel_repair = self._bool(d.pop("parallel_repair", "false")) self.availability_counts_for_recovery = self._bool( d["availability_counts_for_recovery"]) self.availability_to_durability_threshold = splitIntMethod( d["availability_to_durability_threshold"]) self.recovery_probability = splitIntMethod(d["recovery_probability"]) self.max_degraded_slices = float(d["max_degraded_slices"]) self.installment_size = int(d["installment_size"]) self.outputs = splitMethod(d["outputs"]) self.drs_handler = getDRSHandler(data_redundancy[0], data_redundancy[1:]) if not self.lazy_recovery: self.recovery_threshold = self.drs_handler.n - 1 else: self.recovery_threshold = int(d.pop("recovery_threshold")) # flat or hier, if hier, r is the distinct_racks self.hier = self._bool(d.pop("hierarchical", "false")) if self.hier: self.r = int(d["distinct_racks"]) else: self.r = self.drs_handler.n self.system_scaling = self._bool(d.pop("system_scaling", "false")) if self.system_scaling: sections = self._getSections("System Scaling") if sections == []: raise Exception( "No System Scaling section in configuration file") self.system_scaling_infos = [] for section in sections: self.system_scaling_infos.append( self.parserScalingSettings(section)) self.system_upgrade = self._bool(d.pop("system_upgrade", "false")) self.upgrade_ts = [] self.failure_generator = None self.lse_generator = None if self.system_upgrade: sections = self._getSections("System Upgrade") if sections == []: raise Exception( "No System Upgrade section in configuration file") self.system_upgrade_infos = [] for section in sections: upgrade_info = self.parserUpgradeSettings(section) self.system_upgrade_infos.append(upgrade_info) if upgrade_info[0] != 1: times = self.total_time / upgrade_info[2] self.upgrade_ts = [ upgrade_info[2] * i for i in xrange(1, times + 1) ] # upgrade with new disks(like SSD) needs new failure generator and lse generator if upgrade_info[0] == 3: disk_failure_generator = self.conf.get( section, "disk_failure_generator") self.failure_generator = returnEventGenerator( "failureGenerator", disk_failure_generator) latent_error_generator = self.conf.get( section, "latent_error_generator") self.lse_generator = returnEventGenerator( "latentErrorGenerator", latent_error_generator) self.correlated_failures = self._bool( d.pop("correlated_failures", "false")) if self.correlated_failures: sections = self._getSections("Correlated Failures") if sections == []: raise Exception( "No Correlated Failures section in configuration file") self.correlated_failures_infos = [] for section in sections: self.correlated_failures_infos.append( self.parserCorrelatedSetting(section)) self.disk_repair_time, self.node_repair_time = self.comRepairTime() self.total_slices = int( ceil(self.total_active_storage * pow(2, 30) / (self.drs_handler.k * self.chunk_size)))