def _Sync(self): """Forces the master to sync with the other data servers.""" pool = None try: pool = connectionpool.HTTPConnectionPool(self.addr, port=self.port) body = "" headers = {"Content-Length": len(body)} res = pool.urlopen( "POST", "/servers/sync-all", headers=headers, body=body) if res.status == constants.RESPONSE_INCOMPLETE_SYNC: print "Master has tried to contact all the data servers, but failed." return False if res.status == constants.RESPONSE_DATA_SERVERS_UNREACHABLE: print "Master server says that some data servers are not running." print "Giving up..." return False if res.status != constants.RESPONSE_OK: print "Unable to sync servers." return False except urllib3.exceptions.MaxRetryError: print "Unable to contact master..." return False print "Sync done." # Update mapping. self.mapping = rdf_data_server.DataServerMapping(res.data) return True
def _SendStatistics(self): """Send statistics to server.""" try: stat = self.handler_cls.GetStatistics() body = stat.SerializeToString() headers = {"Content-Length": len(body)} res = self.pool.urlopen("POST", "/server/state", headers=headers, body=body) if res.status == constants.RESPONSE_SERVER_NOT_REGISTERED: # The connection has probably been dropped and we need to register # again. self.Register() return True if res.status != constants.RESPONSE_OK: logging.warning("Could not send statistics to data master.") return False # Also receive the new mapping with new statistics. mapping = rdf_data_server.DataServerMapping(res.data) self.handler_cls.SERVICE.SaveServerMapping(mapping) return True except (urllib3.exceptions.MaxRetryError, errors.DataServerError): logging.warning("Could not send statistics to data master.") return False
def _ComputeMappingFromPercentages(self, mapping, newperc): """Builds a new mapping based on the new server range percentages.""" newstart = 0 n_servers = self.mapping.num_servers servers = list(mapping.servers) new_mapping = rdf_data_server.DataServerMapping( version=self.mapping.version + 1, num_servers=n_servers, pathing=self.mapping.pathing) for i, perc in enumerate(newperc): quant = int(perc * constants.MAX_RANGE) interval = rdf_data_server.DataServerInterval(start=newstart) end = newstart + quant if i == len(newperc) - 1: end = constants.MAX_RANGE interval.end = end old_server = servers[i] newstart = end new_mapping.servers.Append( index=old_server.index, address=old_server.address, port=old_server.port, state=old_server.state, interval=interval) return new_mapping
def _Recover(self, transid): """Completes a rebalancing transaction that was unsuccessful.""" print "Contacting master about transaction %s..." % transid, pool = None try: pool = urllib3.connectionpool.HTTPConnectionPool(self.addr, port=self.port) except urllib3.exceptions.MaxRetryError: print "Unable to contact master..." return print "OK." try: body = transid headers = {"Content-Length": len(body)} res = pool.urlopen("POST", "/rebalance/recover", headers=headers, body=body) except urllib3.exceptions.MaxRetryError: print "Unable to contact master..." return if res.status == constants.RESPONSE_TRANSACTION_NOT_FOUND: print "Transaction %s was not found" % transid return if res.status != constants.RESPONSE_OK: print "Potential data master error. Giving up..." return rebalance = rdf_data_server.DataServerRebalance(res.data) print "Got transaction object %s" % rebalance.id answer = raw_input("Proceed with the recover process? (y/n) ") if answer != "y": return body = rebalance.SerializeToString() headers = {"Content-Length": len(body)} try: res = pool.urlopen("POST", "/rebalance/commit", headers=headers, body=body) except urllib3.exceptions.MaxRetryError: print "Could not commit re-sharding transaction with id %s" % rebalance.id print "Make sure the data servers are up and then run:" print "'recover %s' in order to re-run transaction" % rebalance.id return if res.status != constants.RESPONSE_OK: print "Could not commit transaction %s" % rebalance.id print "Make sure the data servers are up and then run:" print "'recover %s' in order to re-run transaction" % rebalance.id return self.mapping = rdf_data_server.DataServerMapping(res.data) print "Rebalance with id %s fully performed." % rebalance.id
def HandleServerSync(self): """Master wants to send the mapping to us.""" if self.MASTER: return self._EmptyResponse(constants.RESPONSE_IS_MASTER_SERVER) mapping = rdf_data_server.DataServerMapping(self.post_data) self.DATA_SERVER.SetMapping(mapping) # Return state server back. body = self.GetStatistics().SerializeToString() return self._Response(constants.RESPONSE_OK, body)
def __init__(self, myport, service): self.service = service stores = config_lib.CONFIG["Dataserver.server_list"] if not stores: logging.error( "Dataserver.server_list is empty: no data servers will" " be available") raise DataMasterError("Dataserver.server_list is empty") self.servers = [DataServer(loc, idx) for idx, loc in enumerate(stores)] self.registered_count = 0 # Load server mapping. self.mapping = self.service.LoadServerMapping() if not self.mapping: # Bootstrap mapping. # Each server information is linked to its corresponding object. # Updating the data server object will reflect immediately on # the mapping. for server in self.servers: server.SetInitialInterval(len(self.servers)) servers_info = [server.server_info for server in self.servers] self.mapping = rdf_data_server.DataServerMapping( version=0, num_servers=len(self.servers), servers=servers_info) self.service.SaveServerMapping(self.mapping, create_pathing=True) else: # Check mapping and configuration matching. if len(self.mapping.servers) != len(self.servers): raise DataMasterError("Server mapping does not correspond " "to the configuration.") for server in self.servers: self._EnsureServerInMapping(server) # Create locks. self.server_lock = threading.Lock() # Register the master. self.myself = self.servers[0] if self.myself.Port() == myport: self._DoRegisterServer(self.myself) else: logging.warning( "First server in Dataserver.server_list is not the " "master. Found port '%i' but my port is '%i'. If you" " really are running master, you may want to specify" " flag --port %i.", self.myself.Port(), myport, myport) raise DataMasterError( "First server in Dataserver.server_list must be " "the master.") # Start database measuring thread. sleep = config_lib.CONFIG["Dataserver.stats_frequency"] self.periodic_thread = utils.InterruptableThread( name="DataServer db measuring thread", target=self._PeriodicThread, sleep_time=sleep) self.periodic_thread.start() # Holds current rebalance operation. self.rebalance = None self.rebalance_pool = []
def _PeriodicThread(self): body = "" headers = {"Content-Length": len(body)} try: res = self.pool.urlopen("POST", "/manage", headers=headers, body=body) if res.status != constants.RESPONSE_OK: return False self.mapping = rdf_data_server.DataServerMapping(res.data) self.mapping_time = time.time() except urllib3.exceptions.MaxRetryError: pass
def RenewMapping(self): """Ask master server for mapping.""" try: res = self.pool.urlopen("POST", "/server/mapping") if res.status != constants.RESPONSE_OK: raise errors.DataServerError("Could not get server mapping from data " "master.") mapping = rdf_data_server.DataServerMapping(res.data) self.handler_cls.SERVICE.SaveServerMapping(mapping) return mapping except urllib3.exceptions.MaxRetryError: raise errors.DataServerError("Error when attempting to communicate with" " data master.")
def LoadServerMapping(self): """Retrieve server mapping from database.""" token = access_control.ACLToken(username="******").SetUID() mapping_str, _ = self.db.Resolve(MAP_SUBJECT, MAP_VALUE_PREDICATE, token=token) if not mapping_str: return None mapping = rdf_data_server.DataServerMapping(mapping_str) # Restore pathing information. if self._DifferentPathing(list(mapping.pathing)): self.pathing = list(mapping.pathing) self.db.RecreatePathing(self.pathing) return mapping
def LoadMapping(self): """Load mapping from the data server.""" started = time.time() while True: data = self._FetchMapping() if data: mapping = rdf_data_server.DataServerMapping(data) return mapping if time.time() - started > config_lib.CONFIG[ "HTTPDataStore.reconnect_timeout"]: raise HTTPDataStoreError("Could not get server mapping from data " "server at %s:%d." % (self.Address(), self.Port())) time.sleep(config_lib.CONFIG["HTTPDataStore.retry_time"])
def HandleRebalancePhase1(self): """Call master to perform phase 1 of the rebalancing operation.""" if not self.MASTER: self._EmptyResponse(constants.RESPONSE_NOT_MASTER_SERVER) return if self.MASTER.IsRebalancing(): self._EmptyResponse(constants.RESPONSE_MASTER_IS_REBALANCING) return new_mapping = rdf_data_server.DataServerMapping(self.post_data) rebalance_id = str(uuid.uuid4()) reb = rdf_data_server.DataServerRebalance(id=rebalance_id, mapping=new_mapping) if not self.MASTER.SetRebalancing(reb): logging.warning("Could not contact servers for rebalancing") self._EmptyResponse(constants.RESPONSE_DATA_SERVERS_UNREACHABLE) return if not self.MASTER.FetchRebalanceInformation(): logging.warning("Could not contact servers for rebalancing statistics") self._EmptyResponse(constants.RESPONSE_DATA_SERVERS_UNREACHABLE) return self.rebalance_id = rebalance_id body = reb.SerializeToString() self._Response(constants.RESPONSE_OK, body)
def _RemServer(self, addr, port): """Remove server from group.""" # Find server. server, _ = self._FindServer(addr, port) if not server: print "Server not found." return if server.interval.start != server.interval.end: print "Server has some data in it!" print "Giving up..." return pool = None try: pool = connectionpool.HTTPConnectionPool(self.addr, port=self.port) except urllib3.exceptions.MaxRetryError: print "Unable to contact master..." return body = self._PackNewServer(addr, port) headers = {"Content-Length": len(body)} try: res = pool.urlopen( "POST", "/servers/rem/check", headers=headers, body=body) except urllib3.exceptions.MaxRetryError: print "Unable to contact master..." return if res.status == constants.RESPONSE_DATA_SERVER_NOT_FOUND: print "Master server says the data server does not exist." return if res.status == constants.RESPONSE_RANGE_NOT_EMPTY: print "Master server says the data server has still some data." print "Giving up..." return if res.status == constants.RESPONSE_DATA_SERVERS_UNREACHABLE: print "Master server says some data servers are not running." print "Giving up..." return if res.status != constants.RESPONSE_OK: print "Master server error. Is the server running?" return print "Master server allows us to remove server %s:%d" % (addr, port) answer = raw_input("Do you really want to remove server //%s:%d? (y/n) " % (addr, port)) if answer != "y": return try: res = pool.urlopen("POST", "/servers/rem", headers=headers, body=body) except urllib3.exceptions.MaxRetryError: print "Unable to contact master..." return if res.status == constants.RESPONSE_DATA_SERVERS_UNREACHABLE: print "Master server says that some data servers are not running." print "Giving up..." return if res.status == constants.RESPONSE_OK: # Update mapping. self.mapping = rdf_data_server.DataServerMapping(res.data) self._CompleteRemServerHelpComplete(addr, port) return if res.status == constants.RESPONSE_INCOMPLETE_SYNC: # We were unable to sync, so we try again: if self._Sync(): self._CompleteRemServerHelpComplete(addr, port) return else: # If we cannot sync in the second attempt, we give up. print("The master server has removed the new server, but the other " "servers may not know about it.") print "Please run 'sync' to fix the problem, followed by:" self._CompleteRemServerHelp(addr, port) return if res.status != constants.RESPONSE_OK: print "Master has returned an unknown error..." return
def _AddServer(self, addr, port): """Starts the process of adding a new server.""" if port <= 0: print "Wrong port: %d" % port return pool = None try: pool = connectionpool.HTTPConnectionPool(self.addr, port=self.port) except urllib3.exceptions.MaxRetryError: print "Unable to contact master..." return body = self._PackNewServer(addr, port) headers = {"Content-Length": len(body)} try: res = pool.urlopen( "POST", "/servers/add/check", headers=headers, body=body) except urllib3.exceptions.MaxRetryError: print "Unable to contact master..." return if res.status == constants.RESPONSE_EQUAL_DATA_SERVER: print "Master server says there is already a similar server." print "Giving up..." return if res.status == constants.RESPONSE_DATA_SERVERS_UNREACHABLE: print "Master server says that some data servers are not running." print "Giving up..." return if res.status != constants.RESPONSE_OK: print "Master server error. Is the server running?" return print "Master server allows us to add server %s:%d" % (addr, port) answer = raw_input("Do you really want to add server //%s:%d? (y/n) " % (addr, port)) if answer != "y": return try: res = pool.urlopen("POST", "/servers/add", headers=headers, body=body) except urllib3.exceptions.MaxRetryError: print "Unable to contact master..." return if res.status == constants.RESPONSE_DATA_SERVERS_UNREACHABLE: print "Master server says that some data servers are not running." print "Giving up..." return if res.status == constants.RESPONSE_INCOMPLETE_SYNC: print("The master server has set up the new server, but the other " "servers may not know about it.") print "Please run 'sync' to fix the problem." print "Afterwards, you have to rebalance server data with the following:" self._CompleteAddServerHelp(addr, port) return if res.status != constants.RESPONSE_OK: print "Failed to contact master server." return print "=============================================" print "Operation completed." print "To rebalance server data you have to do the following:" self._CompleteAddServerHelp(addr, port) # Update mapping. self.mapping = rdf_data_server.DataServerMapping(res.data)
def _DoRebalance(self, new_mapping): """Performs a new rebalancing operation with the master server.""" print "Contacting master server to start re-sharding...", # Send mapping information to master. pool = None try: pool = connectionpool.HTTPConnectionPool(self.addr, port=self.port) except urllib3.exceptions.MaxRetryError: print "Unable to contact master..." return body = new_mapping.SerializeToString() headers = {"Content-Length": len(body)} res = None try: res = pool.urlopen( "POST", "/rebalance/phase1", headers=headers, body=body) except urllib3.exceptions.MaxRetryError: print "Unable to talk with master..." pool.close() return if res.status != constants.RESPONSE_OK: print "Re-sharding cannot be done!" return rebalance = rdf_data_server.DataServerRebalance(res.data) print "OK" print print "The following servers will need to move data:" for i, move in enumerate(list(rebalance.moving)): print "Server %d moves %dKB" % (i, move / 1024) answer = raw_input("Proceed with re-sharding? (y/n) ") if answer != "y": return body = rebalance.SerializeToString() headers = {"Content-Length": len(body)} try: res = pool.urlopen( "POST", "/rebalance/phase2", headers=headers, body=body) except urllib3.exceptions.MaxRetryError: print "Unable to contact server for re-sharding." print "Make sure the data servers are up and try again." return if res.status != constants.RESPONSE_OK: print "Could not start copying files for re-sharding" print "Make sure the data servers are up and try again." return try: res = pool.urlopen( "POST", "/rebalance/commit", headers=headers, body=body) except urllib3.exceptions.MaxRetryError: print("Could not commit the re-sharding transaction with id " "%s") % rebalance.id print "Make sure the data servers are up and then run:" print "'recover %s' in order to re-run transaction" % rebalance.id return if res.status != constants.RESPONSE_OK: print "Could not commit the transaction %s" % rebalance.id print "Make sure the data servers are up and then run:" print "'recover %s' in order to re-run transaction" % rebalance.id return self.mapping = rdf_data_server.DataServerMapping(res.data) print "Rebalance with id %s fully performed." % rebalance.id