def analyze_failures(self, c, take, failure_domain): if failure_domain == 0: # failure domain == device is a border case return None root = c.find_bucket(take) worst = pd.DataFrame() available_buckets = c.collect_buckets_by_type([root], failure_domain) if len(available_buckets) <= self.args.replication_count: log.error("there are not enough " + failure_domain + " to sustain failure") return None for may_fail in available_buckets: f = Crush(verbose=self.args.debug, backward_compatibility=self.args.backward_compatibility) f.crushmap = copy.deepcopy(c.get_crushmap()) root = f.find_bucket(take) f.filter(lambda x: x.get('name') != may_fail.get('name'), root) f.parse(f.crushmap) try: a = self.run_simulation(f, take, failure_domain) a['~over filled %~'] = a['~over/under filled %~'] a = a[['~type~', '~over filled %~']] worst = pd.concat([worst, a]).groupby(['~type~']).max().reset_index() except BadMapping: log.error("mapping failed when removing {}".format(may_fail)) return worst.set_index('~type~')
def test_filter_real(self): name = 'cloud6-1429' c = Crush() c.parse('tests/test_crush_filter.json') crushmap = c.get_crushmap() optimize = sorted(crushmap['choose_args']['optimize'], key=lambda v: v['bucket_id']) assert 3 == len(optimize) assert -1 == optimize[2]['bucket_id'] assert 7 == len(optimize[2]['weight_set'][0]) bucket = c.find_bucket(name) assert name == bucket['name'] c.filter(lambda x: x.get('name') != name, crushmap['trees'][0]) optimize = crushmap['choose_args']['optimize'] assert 2 == len(optimize) assert -1 == optimize[1]['bucket_id'] assert 6 == len(optimize[1]['weight_set'][0]) assert c.find_bucket(name) is None
def run_optimize(self, p, rule_name, crushmap, with_positions=True): pd.set_option('display.max_rows', None) pd.set_option('display.width', 160) p.extend(['--rule', rule_name]) a = Ceph().constructor([ 'analyze', ] + p) c = Crush(backward_compatibility=True) c.parse(crushmap) (take, failure_domain) = c.rule_get_take_failure_domain(rule_name) crushmap = c.get_crushmap() crushmap['choose_args'] = { "optimize": [], } d = a.run_simulation(c, take, failure_domain) if d['~overweight~'].any(): raise ValueError( 'no way to optimize when there is an overweight item') print(str(d)) print(a._format_report(d, 'device')) print(a._format_report(d, failure_domain)) print(a.analyze_failures(c, take, failure_domain)) p.extend(['--choose-args', 'optimize']) pool = Pool() children = [c.find_bucket(take)] while len(children) > 0: a = [(p, crushmap, item, with_positions) for item in children] r = pool.map(o, a) # r = map(o, a) choose_args = filter(None, r) crushmap['choose_args']['optimize'].extend(choose_args) nc = [] for item in children: nc.extend(item.get('children', [])) # fail if all children are not of the same type children = nc pprint.pprint(crushmap) c.parse(crushmap) a = Ceph().constructor([ 'analyze', ] + p) d = a.run_simulation(c, take, failure_domain) print(a._format_report(d, 'device')) print(a._format_report(d, failure_domain)) print(a.analyze_failures(c, take, failure_domain))
def optimize(self, crushmap): c = Crush(backward_compatibility=self.args.backward_compatibility) c.parse(crushmap) crushmap = c.get_crushmap() if 'choose_args' not in crushmap: crushmap['choose_args'] = {} c.parse(crushmap) if self.args.choose_args not in crushmap['choose_args']: crushmap['choose_args'][self.args.choose_args] = [] c.parse(crushmap) (take, failure_domain) = c.rule_get_take_failure_domain(self.args.rule) parser = analyze.Analyze.get_parser() self.main.hook_analyze_args(parser) p = self.main.get_trimmed_argv(parser, self.args) a = self.main.clone().constructor(['analyze'] + p) if self.args.multithread: from multiprocessing import Pool pool = Pool() children = [c.find_bucket(take)] total_count = 0 over_step = False n = self.main.value_name() while not over_step and len(children) > 0: a = [(self, p, c.get_crushmap(), item) for item in children] if self.args.multithread: r = list(pool.map(top_optimize, a)) else: r = list(map(top_optimize, a)) for i in range(len(children)): if r[i] is None: continue (count, choose_arg) = r[i] total_count += count c.update_choose_args(self.args.choose_args, [choose_arg]) log.info(children[i]['name'] + " weights updated with " + str(choose_arg)) if self.args.step and count > 0: log.warning(children[i]['name'] + " will swap " + str(count) + " " + n) over_step = self.args.step and total_count > self.args.step if over_step: break nc = [] for item in children: nc.extend(item.get('children', [])) # fail if all children are not of the same type children = nc return (total_count, c.get_crushmap())