def get_read_table(tables): #shortcut with 1 entry if len(tables) == 1: return tables[0] #'t' is a list of engines itself. since we assume those engines #are on the same machine, just take the first one. len(ips) may be #< len(tables) if some tables are on the same host. ips = dict((t[0].bind.url.host, t) for t in tables) ip_loads = AppServiceMonitor.get_db_load(ips.keys()) total_load = 0 missing_loads = [] no_connections = [] have_loads = [] for ip in ips: if ip not in ip_loads: missing_loads.append(ip) else: load, avg_load, conns, avg_conns, max_conns = ip_loads[ip] #prune high-connection machines if conns < .9 * max_conns: max_load = max(load, avg_load) total_load += max_load have_loads.append((ip, max_load)) else: no_connections.append(ip) if total_load: avg_load = total_load / max(len(have_loads), 1) ip_weights = [(ip, 1 - load / total_load) for ip, load in have_loads] #if total_load is 0, which happens when have_loads is empty else: avg_load = 1.0 ip_weights = [(ip, 1.0 / len(have_loads)) for ip, load in have_loads] if missing_loads or no_connections: #add in the missing load numbers with an average weight ip_weights.extend((ip, avg_load) for ip in missing_loads) #add in the over-connected machines with a 1% weight ip_weights.extend((ip, .01) for ip in no_connections) #rebalance the weights total_weight = sum(w[1] for w in ip_weights) ip_weights = [(ip, weight / total_weight) for ip, weight in ip_weights] r = random.random() for ip, load in ip_weights: if r < load: return ips[ip] else: r = r - load #should never happen print 'yer stupid' return random.choice(tables)
def get_read_table(tables): # short-cut for only one element if len(tables) == 1: return tables[0] #'t' is a list of engines itself. since we assume those engines #are on the same machine, just take the first one. len(ips) may be #< len(tables) if some tables are on the same host. ips = dict((t[0].bind.url.host, t) for t in tables) ip_loads = AppServiceMonitor.get_db_load(ips.keys()) total_load = 0 missing_loads = [] no_connections = [] have_loads = [] for ip in ips: if ip not in ip_loads: missing_loads.append(ip) else: load, avg_load, conns, avg_conns, max_conns = ip_loads[ip] #prune high-connection machines #if conns < .9 * max_conns: max_load = max(load, avg_load) total_load += max_load have_loads.append((ip, max_load)) #else: # no_connections.append(ip) if total_load: avg_load = total_load / max(len(have_loads), 1) ip_weights = [(ip, 1 - load / total_load) for ip, load in have_loads] #if total_load is 0, which happens when have_loads is empty else: avg_load = 1.0 ip_weights = [(ip, 1.0 / len(have_loads)) for ip, load in have_loads] if missing_loads or no_connections: #add in the missing load numbers with an average weight ip_weights.extend((ip, avg_load) for ip in missing_loads) #add in the over-connected machines with a 1% weight ip_weights.extend((ip, .01) for ip in no_connections) #rebalance the weights total_weight = sum(w[1] for w in ip_weights) or 1 ip_weights = [(ip, weight / total_weight) for ip, weight in ip_weights] r = random.random() for ip, load in ip_weights: if r < load: # print "db ip: %s" % str(ips[ip][0].metadata.bind.url.host) return ips[ip] else: r = r - load #should never happen print 'yer stupid' return random.choice(tables)
def get_read_table(self, tables): from r2.lib.services import AppServiceMonitor # short-cut for only one element if len(tables) == 1: return tables[0] if self.dead: tables = set(tables) dead = set(t for t in tables if t[0].bind in self.dead) for t in list(dead): # TODO: tune the reconnect code. We have about 1-2 # requests per second per app, so this should # reconnect every 50-100 seconds. if (random.randint(1, 100) == 42 and self.test_engine(t[0].bind)): dead.remove(t) tables = tables - dead #'t' is a list of engines itself. since we assume those engines #are on the same machine, just take the first one. len(ips) may be #< len(tables) if some tables are on the same host. ips = dict((t[0].bind.url.host, t) for t in tables) ip_loads = AppServiceMonitor.get_db_load(ips.keys()) total_load = 0 missing_loads = [] no_connections = [] have_loads = [] for ip in ips: if ip not in ip_loads: missing_loads.append(ip) else: load, avg_load, conns, avg_conns, max_conns = ip_loads[ip] # remove high load machines from the pool. if load < 100: max_load = max(load, avg_load) total_load += max_load have_loads.append((ip, max_load)) else: no_connections.append(ip) if total_load: avg_load = total_load / max(len(have_loads), 1) ip_weights = [(ip, 1 - load / total_load) for ip, load in have_loads] #if total_load is 0, which happens when have_loads is empty else: avg_load = 1.0 ip_weights = [(ip, 1.0 / len(have_loads)) for ip, load in have_loads] if missing_loads or no_connections: #add in the missing load numbers with an average weight ip_weights.extend((ip, avg_load) for ip in missing_loads) #add in the over-connected machines with a 1% weight ip_weights.extend((ip, .01) for ip in no_connections) #rebalance the weights total_weight = sum(w[1] for w in ip_weights) or 1 ip_weights = [(ip, weight / total_weight) for ip, weight in ip_weights] r = random.random() for ip, load in ip_weights: if r < load: # print "db ip: %s" % str(ips[ip][0].metadata.bind.url.host) return ips[ip] r = r - load #should never happen print 'yer stupid' return random.choice(list(tables))
def get_read_table(self, tables): from r2.lib.services import AppServiceMonitor # short-cut for only one element if len(tables) == 1: return tables[0] if self.dead: tables = set(tables) dead = set(t for t in tables if t[0].bind in self.dead) for t in list(dead): # TODO: tune the reconnect code. We have about 1-2 # requests per second per app, so this should # reconnect every 50-100 seconds. if (random.randint(1,100) == 42 and self.test_engine(t[0].bind)): dead.remove(t) tables = tables - dead #'t' is a list of engines itself. since we assume those engines #are on the same machine, just take the first one. len(ips) may be #< len(tables) if some tables are on the same host. ips = dict((t[0].bind.url.host, t) for t in tables) ip_loads = AppServiceMonitor.get_db_load(ips.keys()) total_load = 0 missing_loads = [] no_connections = [] have_loads = [] for ip in ips: if ip not in ip_loads: missing_loads.append(ip) else: load, avg_load, conns, avg_conns, max_conns = ip_loads[ip] # remove high load machines from the pool. if load < 100: max_load = max(load, avg_load) total_load += max_load have_loads.append((ip, max_load)) else: no_connections.append(ip) if total_load: avg_load = total_load / max(len(have_loads), 1) ip_weights = [(ip, 1 - load / total_load) for ip, load in have_loads] #if total_load is 0, which happens when have_loads is empty else: avg_load = 1.0 ip_weights = [(ip, 1.0 / len(have_loads)) for ip, load in have_loads] if missing_loads or no_connections: #add in the missing load numbers with an average weight ip_weights.extend((ip, avg_load) for ip in missing_loads) #add in the over-connected machines with a 1% weight ip_weights.extend((ip, .01) for ip in no_connections) #rebalance the weights total_weight = sum(w[1] for w in ip_weights) or 1 ip_weights = [(ip, weight / total_weight) for ip, weight in ip_weights] r = random.random() for ip, load in ip_weights: if r < load: # print "db ip: %s" % str(ips[ip][0].metadata.bind.url.host) return ips[ip] r = r - load #should never happen print 'yer stupid' return random.choice(list(tables))
def get_read_table(self, tables): from r2.lib.services import AppServiceMonitor # short-cut for only one element if len(tables) == 1: return tables[0] if self.dead: tables = set(tables) dead = set(t for t in tables if t[0].bind in self.dead) for t in list(dead): # TODO: tune the reconnect code. We have about 1-2 # requests per second per app, so this should # reconnect every 50-100 seconds. # # random.random() generates a random float <= 1. # db_dead_reconnect_prob is defined in the ini # 0.01 makes a 1/100 chance of attempting a reconnect # 1.00 makes a 1/1 chance. rand = random.random() logger.debug("if {0} < {1} , we are trying to reconnect...".format(rand, self.db_dead_reconnect_prob)) if rand < self.db_dead_reconnect_prob: if self.test_engine(t[0].bind): dead.remove(t) #only apply changes to tables if there are changes to apply if dead: tables = tables - dead #'t' is a list of engines itself. since we assume those engines #are on the same machine, just take the first one. len(ips) may be #< len(tables) if some tables are on the same host. ips = dict((t[0].bind.url.host, t) for t in tables) ip_loads = AppServiceMonitor.get_db_load(ips.keys()) total_load = 0 missing_loads = [] no_connections = [] have_loads = [] for ip in ips: if ip not in ip_loads: missing_loads.append(ip) else: load, avg_load, conns, avg_conns, max_conns = ip_loads[ip] # remove high load machines from the pool. if load < 100: max_load = max(load, avg_load) total_load += max_load have_loads.append((ip, max_load)) else: no_connections.append(ip) if total_load: avg_load = total_load / max(len(have_loads), 1) ip_weights = [(ip, 1 - load / total_load) for ip, load in have_loads] #if total_load is 0, which happens when have_loads is empty else: avg_load = 1.0 ip_weights = [(ip, 1.0 / len(have_loads)) for ip, load in have_loads] if missing_loads or no_connections: #add in the missing load numbers with an average weight ip_weights.extend((ip, avg_load) for ip in missing_loads) #add in the over-connected machines with a 1% weight ip_weights.extend((ip, .01) for ip in no_connections) #rebalance the weights total_weight = sum(w[1] for w in ip_weights) or 1 ip_weights = [(ip, weight / total_weight) for ip, weight in ip_weights] r = random.random() for ip, load in ip_weights: if r < load: # print "db ip: %s" % str(ips[ip][0].metadata.bind.url.host) return ips[ip] r = r - load #should never happen logger.error("""I couldn't find any usable PGSQLs anymore. Maybe it is down or maybe I just think it is down. Restarting reddit or postgresql may be a good short-term fix for this. Please examine the logs more thorougly to attempt to find the time I lose all record of usable connections and fix whatever causes this.""") return random.choice(list(tables))