def test_remapping(metasync, opts): replication = 2 config = [(0, 2), (1, 4), (2, 4), (3, 2)] hspace = 20 detmap = DetMap2(config, hspace, replication) N = 50 lst = [] for _ in range(100): randstr = ''.join( random.choice(string.letters + string.digits) for _ in range(N)) hashid = util.sha1(randstr) lst.append(hashid) #lst = detmap.get_mapping(hashid) #for i in lst: # count[i] += 1 detmap.reconfig(config, 3) assert len(detmap.mapinfo) == 2 added, removed = detmap.get_remapping(lst) for i in removed: assert len(removed[i]) == 0 import copy detmap = DetMap2(config, hspace, replication) config = copy.copy(config) config.pop() lst3 = [] for hv in lst: if 3 in detmap.get_mapping(hv): lst3.append(hv) detmap.reconfig(config) added, removed = detmap.get_remapping(lst) assert len(removed[3]) == len(lst3)
def test_map_pack(metasync, opts): config = [(0, 2), (1, 10), (2, 4), (3, 2)] hspace = 100 replication = 2 N = 50 detmap = DetMap2(config, hspace, replication) detmap2 = DetMap2(config, hspace, replication) detmap2.pack() for _ in range(100): randstr = ''.join( random.choice(string.letters + string.digits) for _ in range(N)) hashid = util.sha1(randstr) assert detmap.get_mapping(hashid) == detmap2.get_mapping(hashid)
def test_mapping_dist(metasync, opts): mapping = [("dropbox", 2), ("google", 15), ("box", 10), ("onedrive", 7), ("baidu", 2048)] mapping = map(lambda x: (util.md5(x[0]) % 10000, x[1]), mapping) print(mapping) hspace = (2 + 15 + 10 + 7 + 2048) * 5 objs = [] with open("result/linux_objs.txt") as f: for line in f: sp = line.strip().split("\t") hv = sp[0] size = int(sp[1]) objs.append((hv, size)) for replication in range(1, 4): detmap = DetMap2(mapping, hspace, replication) sizes = {} counts = {} for srv, sz in mapping: sizes[srv] = 0 counts[srv] = 0 for obj in objs: hv = obj[0] size = obj[1] lst = detmap.get_mapping(hv) for srv in lst: counts[srv] += 1 sizes[srv] += size print replication, for srv, sz in mapping: print "%d/%d" % (counts[srv], sizes[srv]), print
def _load(self): if not self.check_sanity(): return if (not os.path.exists(AUTH_DIR)): os.mkdir(AUTH_DIR) # load config self.config = util.load_config(self.path_conf) self.namespace = self.config.get("core", "namespace") self.clientid = self.config.get("core", "clientid") # load services from config self.srvmap = {} for tok in self.config.get("backend", "services").split(","): srv = services.factory(tok) self.srvmap[srv.sid()] = srv self.nreplicas = int(self.config.get("backend", "nreplicas")) nthreads = self.options.nthreads if self.options is not None else 2 self.scheduler = Scheduler(self.services, (nthreads + 1) * len(self.srvmap)) # load translator pipe if self.is_encypted(): self.translators.append(translators.TrEncrypt(self)) # TODO. for integrity option # if self.is_signed(): # self.translators.append(TrSigned(self)) beg = time.time() if (os.path.exists(self.get_path("mapping.pcl"))): with open(self.get_path("mapping.pcl")) as f: self.mapping = pickle.load(f) else: mapconfig = [] for srv in self.services: mapconfig.append((srv.sid(), srv.info_storage() / GB)) hspacesum = sum(map(lambda x: x[1], mapconfig)) hspace = max(hspacesum + 1, 1024) self.mapping = DetMap2(mapconfig, hspace=hspace, replica=self.nreplicas) self.mapping.pack() with open(self.get_path("mapping.pcl"), "w") as f: pickle.dump(self.mapping, f) end = time.time() dbg.time("mapping init %s" % (end - beg)) dbg.dbg("head: %s", self.get_head_name())
def test_mapping_fairness(metasync, opts): "test the fairness of mapping scheme" import string import random def evaluate(count, config): N = sum(count) C = sum(map(lambda x: x[1], config)) score = 0.0 for srv in config: score += (1.0 * count[srv[0]] / srv[1] - 1.0 * N / C)**2 return score config = [(0, 2), (1, 7), (2, 10), (3, 15)] nspace = sum(map(lambda x: x[1], config)) result = [['replication', 'factor', 'result', 'fairness', 'score']] N = 50 random.seed(0) for replication in range(1, 4): for factor in range(100, 1001, 100): hspace = factor * nspace detmap = DetMap2(config, hspace, replication) count = [0, 0, 0, 0] for _ in range(5000): randstr = ''.join( random.choice(string.letters + string.digits) for _ in range(N)) hashid = util.sha1(randstr) lst = detmap.get_mapping(hashid) for i in lst: count[i] += 1 fairness = [1.0 * count[i] / config[i][1] for i in range(4)] score = evaluate(count, config) row = [replication, factor, count, fairness, score] result.append(row) for row in result: for e in row: print "%s\t" % e, print
def test_mapping2(metasync, opts): "test a new mapping scheme to tolerate rebalancing on node-failre" from itertools import permutations # goal # 1. non-uniformly locating blobs, approximately reflecting storage size of each node # 2. minimize realigning on a node failure # design # 0. node -> (node, storage) # (e.g., (1,0), (1,1) if node 1 has 2G storage) # 1. fixed hspace, where h(blob) % hspace = index # (hspace any large number, NOT depending on len(nodes)) # 2. detmap[index] -> a group of nodes # (a group includes all nodes, but different order see 3) # 3. order nodes in a group, by hash(index, node) # (so deterministic) # 4. in each group, pick first #replication nodes # failure # node change # replication # => in all of above situations, only blobs in old/new node # will be re-balanced # def uniq(lst, n): rtn = [] for (i, _) in lst: if i in rtn: continue rtn.append(i) if len(rtn) == n: break return rtn replication = 2 config = [(0, 2), (1, 4), (2, 4), (3, 2)] hspace = 20 detmap = DetMap2(config, hspace, replication) stats = [0] * len(config) for (k, v) in enumerate(detmap.mapinfo[0].detmap): """ # doesn't it need to calculate over uniq? for i in range(replication): stats[v[i][0]] += 1 """ for i in uniq(v, replication): stats[i] += 1 if opts.slow: dbg.info("%-2s: [" % k) for (n, priority) in v: dbg.info(" %s: %s" % (n, priority)) dbg.info("]") else: dbg.info("%-2s: %s -> %s" \ % (k, [e[0] for e in v], detmap.get_mapping(k))) # approximately reflect storage? for (i, k) in enumerate(stats): dbg.info("%s (%s GB) -> #%s" % (i, config[i][1], k))