예제 #1
0
class Evaluator:
    def __init__(self, start_time, end_time):
        self.set_time(start_time, end_time)
        self.aggregator = Aggregator()

    def set_time(self, start_time, end_time):
        self.start_time = start_time
        self.end_time = end_time

    # read_logs and moving_logs are always emited unless for testing
    def evaluate(self, read_logs=None, moving_logs=None):
        # collect server logs
        if read_logs is None:
            read_logs = self.aggregator.get_read_log_entries(
                self.start_time, self.end_time)
        if moving_logs is None:
            moving_logs = self.aggregator.get_moving_log_entries(
                self.start_time, self.end_time)
        # calculate the average latency
        latency_sum = 0
        request_count = 0
        ip_cache = ip_location_cache()
        for log in read_logs:
            timestamp, uuid, source, source_uuid, dest, req_type, status, response_size = log
            client_loc = ip_cache.get_lat_lon_from_ip(source)
            server_loc = ip_cache.get_lat_lon_from_ip(dest)
            distance = util.get_distance(client_loc, server_loc)
            unit = 1000.0
            latency = distance / unit
            request_importance = 1
            latency_sum += latency * request_importance
            request_count += request_importance
        average_latency = latency_sum / request_count

        inter_datacenter_traffic = 0
        for log in moving_logs:
            timestamp, uuid, source, source_uuid, dest, req_type, status, response_size = log
            # treat all files as uniform size
            inter_datacenter_traffic += 1
        # display latency, cost, etc
        return average_latency, inter_datacenter_traffic
예제 #2
0
class GreedyReplication:
    def __init__(self):
        self.aggregator = Aggregator()  # to retrive server logs
        self.client_set = set([])  # [client_ip, ]
        self.server_set = set(util.retrieve_server_list())  # [server_ip, ]
        self.content_set = set([])  # [uuid, ]
        self.access_map = {}  # {uuid: {client_ip: num_request}}
        self.replica_map = {}  # {uuid: {server_ip: num_replica}}
        self.last_timestamp = 0  # the timestamp of last update
        self.requests_per_replica = 3
        self.uuid_to_server = None
        # self.sample_interval = 1000 # the time interval between two rounds in second

    # update client_set, server_set, content_set, access_info
    # and replication status
    # call this function before running greedy algorithm
    def update(self):
        # clear data
        self.content_set = set([])
        self.access_map = {}
        self.replica_map = {}
        # update content_set, replica_map
        for server in self.server_set:
            file_list = util.get_file_list_on_server(server)
            for file_uuid in file_list:
                self.content_set.add(file_uuid)
                if file_uuid not in self.replica_map:
                    self.replica_map[file_uuid] = {}
                if server not in self.replica_map[file_uuid]:
                    self.replica_map[file_uuid][server] = 0
                self.replica_map[file_uuid][server] += 1

        current_timestamp = int(time.time())
        logs = self.aggregator.get_read_log_entries(self.last_timestamp,
                                                    current_timestamp)
        # used recently generated logs to update inner data structure
        for log in logs:
            timestamp, uuid, source, source_uuid, dest, req_type, status, response_size = log
            if uuid not in self.content_set:
                continue
            self.content_set.add(uuid)
            if uuid not in self.access_map:
                self.access_map[uuid] = {}
            self.client_set.add(source)
            if req_type == 'READ':
                if source not in self.access_map[uuid]:
                    self.access_map[uuid][source] = 0
                self.access_map[uuid][source] += 1
        self.last_timestamp = current_timestamp

    def run_replication(self):
        self.update()
        request_delta = self.requests_per_replica / 10
        replica_delta = 1
        i = 0
        if not self.enough_replica_on_increase(request_delta):
            self.add_replica(request_delta, replica_delta)
        # currently we don't remove any replica
        # else:
        # remove_replica()

    # test whether current replicas can handle more requests
    #
    # delta: specify the amount of request increased every time
    def enough_replica_on_increase(self, delta):
        for c in self.content_set:
            if c in self.access_map:
                for a in self.access_map[c].keys():
                    # add a small amount of requests for content c from client a
                    self.access_map[c][a] += delta
                    # test whether current replicas can handle that much request
                    is_enough = self.enough_replica()
                    # back tracking,
                    self.access_map[c][a] -= delta
                    if not is_enough:
                        return False
        return True

    def add_replica(self, request_delta, replica_delta):
        I = []
        for c in self.content_set:
            if c in self.access_map:
                for a in self.access_map[c].keys():
                    # add a small amount of requests for content c from client a
                    self.access_map[c][a] += request_delta
                    # test whether current replicas can handle that much request
                    if not self.enough_replica():
                        I.append((a, c))
                    # back tracking,
                    self.access_map[c][a] -= request_delta
        max_satisfied_num = 0
        best_c = None
        best_s = None
        # find the server s to replicate content c so that
        # maximum number of starved clients can be satisfied
        for a, c in I:
            for s in self.server_set:
                satisfied_num = 0
                self.access_map[c][a] += request_delta
                if s not in self.replica_map[c]:
                    self.replica_map[c][s] = 0
                self.replica_map[c][s] += replica_delta
                if self.enough_replica():
                    satisfied_num += 1
                self.access_map[c][a] -= request_delta
                self.replica_map[c][s] -= replica_delta
                if self.replica_map[c][s] == 0:
                    self.replica_map[c].pop(s)
                if (satisfied_num > max_satisfied_num):
                    max_satisfied_num = satisfied_num
                    best_c = c
                    best_s = s
        if max_satisfied_num > 0:
            source = self.replica_map[best_c].iterkeys().next()
            if source == best_s:
                # can't hold more than 1 replica, replicate to a random other server
                best_s = random.sample(self.server_set - set([source]), 1)[0]
            self.replicate(best_c, source, best_s)
        else:
            # replicate everything
            print 'replicate to all servers'
            for content in self.content_set:
                if not self.enough_replica_for_content(content):
                    if content not in self.replica_map:
                        continue
                    source = self.replica_map[content].iterkeys().next()
                    #select first none zero replica
                    for server in self.server_set:
                        print 'replicate ' + 'content: ' + content + ' from: ' + source + ' to ' + server
                        util.replicate(content, source, server)

    def enough_replica(self):
        # this is an approximate implementation, may need to
        # construct a bipartite graph and run min matching algo
        for c in self.content_set:
            if not self.enough_replica_for_content(c):
                return False
        return True

    def enough_replica_for_content(self, c):
        # this is an approximate implementation, may need to
        # construct a bipartite graph and run min matching algo
        server_to_request_sum_map = {}
        if c not in self.access_map.keys():
            # no client accesses c
            return True
        for a in self.access_map[c].keys():
            nearest_server = util.find_closest_servers_with_ip(
                a, self.server_set)[0]['server']
            if nearest_server not in server_to_request_sum_map:
                server_to_request_sum_map[nearest_server] = 0
            server_to_request_sum_map[nearest_server] += self.access_map[c][a]
        for server, request_sum in server_to_request_sum_map.iteritems():
            if (c not in self.replica_map) or (server
                                               not in self.replica_map[c]):
                return False
            if self.replica_map[c][
                    server] * self.requests_per_replica < request_sum:
                return False
        return True

    def replicate(self, content, source, dest):
        print 'Greedy: replicate file %s from %s to %s', (content, source,
                                                          dest)
        if source == dest:
            if dest not in self.replica_map[content]:
                self.replica_map[content] = 0
            self.replica_map[content][dest] += 1
        else:
            util.replicate(content, source, dest)