Esempio n. 1
0
def test_remapping(metasync, opts):
    replication = 2
    config = [(0, 2), (1, 4), (2, 4), (3, 2)]
    hspace = 20
    detmap = DetMap2(config, hspace, replication)
    N = 50
    lst = []
    for _ in range(100):
        randstr = ''.join(
            random.choice(string.letters + string.digits) for _ in range(N))
        hashid = util.sha1(randstr)
        lst.append(hashid)

        #lst = detmap.get_mapping(hashid)
        #for i in lst:
        #    count[i] += 1
    detmap.reconfig(config, 3)
    assert len(detmap.mapinfo) == 2
    added, removed = detmap.get_remapping(lst)
    for i in removed:
        assert len(removed[i]) == 0
    import copy
    detmap = DetMap2(config, hspace, replication)
    config = copy.copy(config)
    config.pop()
    lst3 = []
    for hv in lst:
        if 3 in detmap.get_mapping(hv):
            lst3.append(hv)
    detmap.reconfig(config)
    added, removed = detmap.get_remapping(lst)
    assert len(removed[3]) == len(lst3)
Esempio n. 2
0
def test_map_pack(metasync, opts):
    config = [(0, 2), (1, 10), (2, 4), (3, 2)]
    hspace = 100
    replication = 2
    N = 50
    detmap = DetMap2(config, hspace, replication)

    detmap2 = DetMap2(config, hspace, replication)
    detmap2.pack()

    for _ in range(100):
        randstr = ''.join(
            random.choice(string.letters + string.digits) for _ in range(N))
        hashid = util.sha1(randstr)
        assert detmap.get_mapping(hashid) == detmap2.get_mapping(hashid)
Esempio n. 3
0
def test_mapping_dist(metasync, opts):
    mapping = [("dropbox", 2), ("google", 15), ("box", 10), ("onedrive", 7),
               ("baidu", 2048)]
    mapping = map(lambda x: (util.md5(x[0]) % 10000, x[1]), mapping)
    print(mapping)
    hspace = (2 + 15 + 10 + 7 + 2048) * 5
    objs = []
    with open("result/linux_objs.txt") as f:
        for line in f:
            sp = line.strip().split("\t")
            hv = sp[0]
            size = int(sp[1])
            objs.append((hv, size))

    for replication in range(1, 4):
        detmap = DetMap2(mapping, hspace, replication)
        sizes = {}
        counts = {}
        for srv, sz in mapping:
            sizes[srv] = 0
            counts[srv] = 0

        for obj in objs:
            hv = obj[0]
            size = obj[1]
            lst = detmap.get_mapping(hv)
            for srv in lst:
                counts[srv] += 1
                sizes[srv] += size
        print replication,
        for srv, sz in mapping:
            print "%d/%d" % (counts[srv], sizes[srv]),

        print
Esempio n. 4
0
    def _load(self):
        if not self.check_sanity():
            return

        if (not os.path.exists(AUTH_DIR)): os.mkdir(AUTH_DIR)

        # load config
        self.config = util.load_config(self.path_conf)
        self.namespace = self.config.get("core", "namespace")
        self.clientid = self.config.get("core", "clientid")

        # load services from config
        self.srvmap = {}
        for tok in self.config.get("backend", "services").split(","):
            srv = services.factory(tok)
            self.srvmap[srv.sid()] = srv

        self.nreplicas = int(self.config.get("backend", "nreplicas"))

        nthreads = self.options.nthreads if self.options is not None else 2
        self.scheduler = Scheduler(self.services,
                                   (nthreads + 1) * len(self.srvmap))

        # load translator pipe
        if self.is_encypted():
            self.translators.append(translators.TrEncrypt(self))

        # TODO. for integrity option
        # if self.is_signed():
        #     self.translators.append(TrSigned(self))

        beg = time.time()
        if (os.path.exists(self.get_path("mapping.pcl"))):
            with open(self.get_path("mapping.pcl")) as f:
                self.mapping = pickle.load(f)
        else:
            mapconfig = []
            for srv in self.services:
                mapconfig.append((srv.sid(), srv.info_storage() / GB))
            hspacesum = sum(map(lambda x: x[1], mapconfig))
            hspace = max(hspacesum + 1, 1024)
            self.mapping = DetMap2(mapconfig,
                                   hspace=hspace,
                                   replica=self.nreplicas)
            self.mapping.pack()
            with open(self.get_path("mapping.pcl"), "w") as f:
                pickle.dump(self.mapping, f)
        end = time.time()
        dbg.time("mapping init %s" % (end - beg))
        dbg.dbg("head: %s", self.get_head_name())
Esempio n. 5
0
def test_mapping_fairness(metasync, opts):
    "test the fairness of mapping scheme"

    import string
    import random

    def evaluate(count, config):
        N = sum(count)
        C = sum(map(lambda x: x[1], config))
        score = 0.0
        for srv in config:
            score += (1.0 * count[srv[0]] / srv[1] - 1.0 * N / C)**2
        return score

    config = [(0, 2), (1, 7), (2, 10), (3, 15)]
    nspace = sum(map(lambda x: x[1], config))
    result = [['replication', 'factor', 'result', 'fairness', 'score']]
    N = 50
    random.seed(0)

    for replication in range(1, 4):
        for factor in range(100, 1001, 100):

            hspace = factor * nspace
            detmap = DetMap2(config, hspace, replication)
            count = [0, 0, 0, 0]

            for _ in range(5000):
                randstr = ''.join(
                    random.choice(string.letters + string.digits)
                    for _ in range(N))
                hashid = util.sha1(randstr)

                lst = detmap.get_mapping(hashid)
                for i in lst:
                    count[i] += 1
            fairness = [1.0 * count[i] / config[i][1] for i in range(4)]
            score = evaluate(count, config)
            row = [replication, factor, count, fairness, score]
            result.append(row)

    for row in result:
        for e in row:
            print "%s\t" % e,
        print
Esempio n. 6
0
def test_mapping2(metasync, opts):
    "test a new mapping scheme to tolerate rebalancing on node-failre"

    from itertools import permutations

    # goal
    # 1. non-uniformly locating blobs, approximately reflecting storage size of each node
    # 2. minimize realigning on a node failure

    # design
    # 0. node -> (node, storage)
    #    (e.g., (1,0), (1,1) if node 1 has 2G storage)
    # 1. fixed hspace, where h(blob) % hspace = index
    #    (hspace any large number, NOT depending on len(nodes))
    # 2. detmap[index] -> a group of nodes
    #    (a group includes all nodes, but different order see 3)
    # 3. order nodes in a group, by hash(index, node)
    #    (so deterministic)
    # 4. in each group, pick first #replication nodes

    # failure
    # node change
    # replication
    #   => in all of above situations, only blobs in old/new node
    #      will be re-balanced
    #

    def uniq(lst, n):
        rtn = []
        for (i, _) in lst:
            if i in rtn:
                continue
            rtn.append(i)
            if len(rtn) == n:
                break
        return rtn

    replication = 2
    config = [(0, 2), (1, 4), (2, 4), (3, 2)]
    hspace = 20
    detmap = DetMap2(config, hspace, replication)

    stats = [0] * len(config)
    for (k, v) in enumerate(detmap.mapinfo[0].detmap):
        """
        # doesn't it need to calculate over uniq?
        for i in range(replication):
            stats[v[i][0]] += 1
        """
        for i in uniq(v, replication):
            stats[i] += 1
        if opts.slow:
            dbg.info("%-2s: [" % k)
            for (n, priority) in v:
                dbg.info("  %s: %s" % (n, priority))
            dbg.info("]")
        else:
            dbg.info("%-2s: %s -> %s" \
                       % (k, [e[0] for e in v], detmap.get_mapping(k)))

    # approximately reflect storage?
    for (i, k) in enumerate(stats):
        dbg.info("%s (%s GB) -> #%s" % (i, config[i][1], k))