def delete(self, path): master_server = rpc_call(self.master_addr) err = master_server.delete(path) if err: log.error("Error while deleting %s : %s", path, err) else: log.info("File Deleted Successfully")
def create(self, path): master_server = rpc_call(self.master_addr) resp, err = master_server.create(path) if resp: log.debug("Create API response %s", resp) else: log.error("Error creating file '%s'. Why? : %s", path, err)
def write_helper(self, path, chunk_index, start, end, data): chunk_handle, chunk_locations, err = self.get_chunk_guaranteed( path, chunk_index) if err: return False data_id = DataId(self.client_id, time.time()) # Push data to all replicas' memory. err = self.push_data(chunk_locations, data_id, data) if err: log.error('Data not pushed to all replicas.') return False # Once data is pushed to all replicas, send write request to the primary replica. # primary = address of primary chunk server primary = self.find_lease_holder(chunk_handle) if not primary: log.error("Primary chunk server not found.") return False primary_cs = rpc_call(primary) err = primary_cs.write(data_id.client_id, data_id.timestamp, path, chunk_index, chunk_handle, start, chunk_locations) if err: return False return True
def getfilelength(self, path): """This function calls Master Server GetFileLength Function to get total length of the file""" master_server = rpc_call(self.master_addr) filelength, err = master_server.get_file_length(path) log.debug("%s length is: %s", path, filelength) return filelength, err
def push_data(self, chunk_locations, data_id, data): for srv_addr in chunk_locations: cs = rpc_call(srv_addr) err = cs.push_data(data_id.client_id, data_id.timestamp, data) if err: return err return None
def list_allfiles(self, path): master_server = rpc_call(self.master_addr) resp, err = master_server.list_allfiles(path) if resp: log.debug("List of files in %s:\n", path) for file in resp: log.debug("%s\n", file) else: log.error("Error creating file '%s'. Why? : %s", path, err)
def __init__(self, master_addr): self.master_addr = master_addr master_server = rpc_call(self.master_addr) # call master to get a unique client id self.client_id = master_server.unique_client_id() self.location_cache = { } # TODO: implement cache with timeout. need some kind of expiring dict self.lease_holder_cache = {} # TODO: implement cache with timeout
def apply_to_secondary(self, client_id, timestamp, path, chunk_index, chunk_handle, offset, chunk_locations): # // RPC each secondary chunkserver to apply the write. for address in chunk_locations: if address != self.my_addr: cs = rpc_call(address) err = cs.serialized_write(client_id, timestamp, path, chunk_index, chunk_handle, offset, chunk_locations, False) if err: return err return None
def test_connection(self, chunk_server_addr): chunk_server = rpc_call(chunk_server_addr) try: # try to connect with chunkserver resp = chunk_server.delete_bad_chunk(self.chunks_to_delete) if resp: log.info("%s has deleted all bad chunks", chunk_server_addr) return True else: log.info("%s is unable to delete all bad chunk handle", chunk_server_addr) return True except ConnectionRefusedError: log.info("Unable to connect with %s", chunk_server_addr) return False
def read_helper(self, path, chunk_index, start, length): """Call Chunkserver RPC to read chunkdata""" chunk_handle, chunk_locations, err = self.find_chunk(path, chunk_index) if err: return None, err random_num = random.randint( 1, min(len(chunk_locations), REPLICATION_FACTOR)) - 1 # -1 for zero based index chunk_loc = chunk_locations[random_num] log.debug("Chunk Handle %s and chunk Locations %s ", chunk_handle, chunk_locations) chunk_server = rpc_call(chunk_loc) data, err = chunk_server.read(chunk_handle, start, length) # TODO :Handle case if server is down return data, err
def poll_chunkservers(self): """A one time polling function, runs when master is started to get list of chunks from active chunk servers and update the chunks_of_chunkserver dict.""" log.debug("****Polling active chunkservers start***") for chunk_server in self.active_chunk_servers: log.debug("Polling chunkserver %s", chunk_server) cs = rpc_call(chunk_server) try: chunk_handles = cs.get_chunk_handles() # update chunks_of_chunkserver dict self.chunks_of_chunk_server[chunk_server] = chunk_handles log.debug("Polling complete for chunkserver: %s", chunk_server) except ConnectionRefusedError: log.error("Polling failed for chunkserver: %s", chunk_server) log.debug("****Polling active chunkservers end***")
def append(self, path, data): length = len(data) # First check if the size is valid. if length > APPEND_SIZE: log.error("ERROR: Data size exceeds append limit.") return "size limit exceeded" # To calculate chunkIndex we must get the length. filelength, err = self.getfilelength(path) if err: log.error("Error while fetching file length %s", err) else: log.debug("File length fetched from server %s", filelength) chunk_index = filelength // CHUNK_SIZE # Get chunkHandle and chunkLocations chunk_handle, chunk_locations, err = self.get_chunk_guaranteed( path, chunk_index) print("APPEND :: ", chunk_handle, chunk_locations, err) if err: return "can't get chunk handle location" # Construct dataId with clientId and current timestamp. data_id = DataId(self.client_id, time.time()) # Push data to all replicas' memory. err = self.push_data(chunk_locations, data_id, data) if err: log.error('Data not pushed to all replicas.') return "Data not pushed to all replicas." # Once data is pushed to all replicas, send append request to the primary. primary = self.find_lease_holder(chunk_handle) if not primary: log.error("Primary chunk server not found.") return "Primary chunk server not found." # Make Append call to primary chunk server primary_cs = rpc_call(primary) offset = primary_cs.append(data_id.client_id, data_id.timestamp, chunk_handle, chunk_index, path, chunk_locations) print("offset = ", offset) return offset
def find_lease_holder(self, chunk_handle): key = f'{chunk_handle}' value = self.lease_holder_cache.get(key) if value: return value['primary'] # If not found in cache, RPC the master server. ms = rpc_call(self.master_addr) primary, lease_ends, err = ms.find_lease_holder(chunk_handle) if not err: self.lease_holder_cache[key] = { 'primary': primary, 'lease_ends': lease_ends } return primary return None
def find_chunk(self, path, chunk_index): key = f'{path}:{chunk_index}' value = self.location_cache.get(key, None) if value: # cached value found return value.chunk_handle, value.chunk_locations, None # else: not found in cache, get from master server ms = rpc_call(self.master_addr) chunk_locations, chunk_handle, err = ms.find_locations( path, chunk_index) if not err: # Save into location cache chunk_info = ChunkInfo(chunk_handle, chunk_locations) self.location_cache[key] = chunk_info return chunk_handle, chunk_locations, err return None, None, err
def order_chunk_copy_from_peer(self, peer_address, chunk_handle): """This RPC is called by master to order a chunkserver to copy some chunks from a peer chunk server so as to meet the replication goal for that chunk.""" peer_chunk_server = rpc_call(peer_address) # get chunk_info from peer chunk_index, path, length = peer_chunk_server.get_chunk_info_from_peer( chunk_handle) # get chunk's actual data data, err = peer_chunk_server.read(chunk_handle, 0, length) if err: log.error(err) return err # write data with that chunk_handle as filename to local filesystem filename = f"{chunk_handle}" err = self.apply_write(filename, data.data, 0) if err: return err self.report_chunk_info(chunk_handle, chunk_index, path, length, 0)
def start_chunkserver(master_addr, my_ip, my_port, path): ensure_dir(path) # make sure this path exists my_address = f'http://{my_ip}:{my_port}' metadata_filename = f'logs/ck_{my_port}.txt' cs = ChunkServer(my_address, master_addr, path, metadata_filename) # Load metadata load_metadata(cs) # tell master about the presence of this chunk server # and also send the list of chunks present here # must do this after loading from oplog ms = rpc_call(cs.master_addr) ms.notify_master(cs.my_addr, list(cs.chunks.keys())) chunk_server = SimpleXMLRPCServer((my_ip, my_port), logRequests=True, allow_none=True) chunk_server.register_introspection_functions() chunk_server.register_instance(cs) chunk_server.serve_forever()
def add_chunk(self, path, chunk_index): ms = rpc_call(self.master_addr) chunk_handle, chunk_locations, err = ms.add_chunk(path, chunk_index) return chunk_handle, chunk_locations, err
def beat(self): # FIXME: Simplify while True: time.sleep(HEARTBEAT_INTERVAL) log.debug("Heart Beating %s", self.locations) log.debug("Heart Beating %s", self.active_chunk_servers) # build list of dead chunk servers # by testing a connection to them dead_chunk_servers = [cs for cs in self.active_chunk_servers if not self.test_connection(cs)] log.debug("Dead chunk servers list = %s", dead_chunk_servers) # delete dead chunk server from active chunk servers list self.active_chunk_servers.difference_update(dead_chunk_servers) # loop over all chunk handles of dead chunk server for dead_chunk_server in dead_chunk_servers: # get list of chunks that need to be replicated chunk_handles = self.chunks_of_chunk_server.get(dead_chunk_server, []) for chunk_handle in chunk_handles: chunk_info = self.locations.get(chunk_handle, None) if chunk_info and dead_chunk_server in chunk_info.chunk_locations: # remove dead chunkserver from chunk's chunk_info.chunk_locations chunk_info.chunk_locations.remove(dead_chunk_server) dest_cs = None # if replication is needed # and we have enough number of active chunkservers # then perform replication if REPLICATION_FACTOR - len(chunk_info.chunk_locations) > 0 \ and len( self.active_chunk_servers) >= REPLICATION_FACTOR: # TODO: Probably handle with semaphore while True: # keep looping until we pick a chunk server which does not already contain this chunk # TODO: don't run infinitely, set a fixed max number of times this is executed rand_loc = pick_randomly(self.active_chunk_servers, 1)[0] if rand_loc not in chunk_info.chunk_locations: dest_cs = rand_loc break if not dest_cs: # if no valid destination chunkserver found, skip this chunks replication continue # else perform replication # call order_chunk copy_from_peer peer_address = pick_randomly(chunk_info.chunk_locations, 1)[0] cs_proxy = rpc_call(dest_cs) try: err = cs_proxy.order_chunk_copy_from_peer(peer_address, chunk_handle) if err: log.info("Unable to replicate to %s due to %s", dest_cs, err) except ConnectionRefusedError: log.info("Unable to connect to %s for %s replication", dest_cs, chunk_handle) # delete dead_chunk_server from chunks_of_chunk_server_list # TODO: donot remove if replication was not performed self.chunks_of_chunk_server.pop(dead_chunk_server, None)
def report_chunk(cs, chunk_info): ms = rpc_call(cs.master_addr) # TODO: receive returned error if any ms.report_chunk(cs.my_addr, chunk_info.chunk_handle, chunk_info.chunk_index, chunk_info.length, chunk_info.path)