def testGetGossip(result): """ Check that a get forces a gossip. This test works regardless of the LB's hash algorithm because the test directly gets from the DB instance, forcing it to merge any pending gossip. """ if active['ndb'] <= 1: result({'type': 'TEST_SKIPPED', 'reason': 'Only 1 database (no gossip)'}) base = 'aardvark' first = base+'0' vc = VectorClock().update('c0', 1) put(first, 1, vc) dbId = getDBId(first) if dbId == -1: result({'type': 'KEY_NOT_SAVED'}) return dbsub = dbSubscriber(dbId) for i in range(1,active['digest-length']+1): item = base + str(i) put(item, 1, vc, dbPort(dbId)) result({'type': 'bool', 'expected': False, 'got': clients[dbsub].exists(base+'*')}) rating, _, _ = get('hello', port=dbPort(dbsub)) result({'type': 'float', 'expected': 0.0, 'got': rating}) rating, choices, clocks = get(first, port=dbPort(dbsub)) result({'type': 'float', 'expected': 1.0, 'got': rating}) result({'type': 'EXPECT_CHOICES', 'expected': [1.0], 'got': choices}) result({'type': 'EXPECT_CLOCKS', 'expected': [vc.asDict()], 'got': [c.asDict() for c in clocks]})
def __init__(self, router, address, view, replication_factor): KV_store.__init__(self, address) self.gossipScheduled = False self.lastToGossip = False self.gossiping = False #self.sched = Scheduler() #self.sched.start() self.history = [('Initialized', datetime.now())] self.ADDRESS = address self.VC = VectorClock(view=view, clock=None) self.ring_edge = 691 if len( view) < 100 else 4127 # parameter for hash mod value self.repl_factor = replication_factor self.num_shards = 0 self.virtual_range = 16 self.shard_interval = self.ring_edge // self.virtual_range self.nodes = [] self.shard_ID = -1 self.V_SHARDS = [] # store all virtual shards self.P_SHARDS = [[] for i in range(0, self.num_shards) ] # map physical shards to nodes self.virtual_translation = {} # map virtual shards to physical shards self.backoff_mod = 59 self.router = router self.view_change(view, replication_factor)
def vector_converge(tea_name,r_rating, r_vc): if DEBUG: print "[new recieved]tea_name:" , tea_name, "rating:", r_rating, "new v clocks:",r_vc rating, choices, vc = get_from_redis(tea_name) if rating == None: # no key found for the tea conv_rating = r_rating conv_choices = [r_rating] conv_vc = r_vc else: if DEBUG: print "[previous]tea_name:" , tea_name, "rating:", rating,"choices:", choices, "new v clocks:",vc #compare existing clocks and received clock #print "r_vc = " , r_vc.clock , "vc=", vc.clock , if DEBUG: print "[compare] r_vc == vc :", (r_vc == vc) print "[compare] r_vc > vc: " , (r_vc > vc) print "[compare] r_vc < vc: " , (r_vc < vc) print "[compare] r_vc >= vc: " ,( r_vc >= vc) print "[compare] coalesce = " , VectorClock.coalesce([r_vc,vc]) print "[compare] converge = " , VectorClock.converge([r_vc,vc]) if r_vc == vc: conv_rating = r_rating conv_choices = None conv_vc = None elif r_vc > vc: # more recent data #compute mean value conv_rating = r_rating #choose the recent vector clocks conv_vc = r_vc #choose the recent choices based on the clocks conv_choices = [r_rating] elif r_vc < vc: # ignore if DEBUG: print "[ignore] r_vc<vc" #compute mean value conv_rating = rating #choose the recent vector clocks conv_vc = None #choose the recent choices based on the clocks conv_choices = None else: combined_clocks_list = VectorClock.coalesce([r_vc,vc]) if DEBUG: print "combined clocks:",combined_clocks_list if r_vc in combined_clocks_list and vc in combined_clocks_list: combined_clocks_list.sort() #choose merged choices based on the clocks conv_choices = choices + [r_rating] conv_rating = meanAvg(conv_choices) #choose the recent vector clocks conv_vc = VectorClock.converge([r_vc,vc]) if DEBUG: print "[incom] c_choices=", conv_choices, "c_rating=",conv_rating, "c_vc=", conv_vc return conv_rating, conv_choices,conv_vc
def gossip(): # checks if I am currently gossiping with someone else if not shard.gossiping: shard.lastToGossip = False incoming_data = json.loads(request.get_json()) shard.gossiping = True # causal context of the incoming node trying to gossip other_context = incoming_data["causal-context"] # key store of incoming node trying to gossip other_kvstore = incoming_data["kv-store"] # this is true if the other node determined i will be the tiebreaker tiebreaker = True if incoming_data["tiebreaker"] == ADDRESS else False incoming_Vc = VectorClock(view=None, clock=other_context) if other_kvstore == shard.keystore: shard.gossiping = False return jsonify({"message": "We're equal."}), 201 elif incoming_Vc.allFieldsZero(): shard.gossiping = False return jsonify({"message": "You dont have any data"}), 201 elif incoming_Vc.selfHappensBefore(shard.VC.__repr__()): # I am at least concurrent or after shard.gossiping = False return jsonify({"message": "I Don't need yours."}), 201 elif incoming_Vc.__repr__() != shard.VC.__repr__(): shard.keystore = other_kvstore shard.VC.merge(other_context, ADDRESS) shard.gossiping = False return jsonify({"message": "I took your data"}), 200 return jsonify({"message": "gossiping"}), 201
def merge_clock(rating, clock, key): global client # make sure the clock is a VectorClock object first if not isinstance(clock, VectorClock) and isinstance(clock, dict): clock = VectorClock.fromDict(clock) #if not isinstance(clock, VectorClock) and not isinstance(clock,dict): # pdb.set_trace() clockDict = clock.asDict() # lets get the hash from redis for this tea-x teaHash = client.hgetall(key) #flag so we know whether to just add the new rating or not isClientExist = False for clockJsonString, redisRating in teaHash.iteritems(): redisClockDict = json.loads(clockJsonString) redisClock = VectorClock.fromDict(redisClockDict) # Check if the clock is inside the redis if clock >= redisClock or clock < redisClock: isClientExist = True # well, looks like we won't be creating a new one # returns either [redisclock], [clock], or [redisClock, clock] # which means stale, recent, incomparable respectively vcl = coalesce(redisClock, clock) # lets cache the comparisons redisClockIncluded = redisClock in vcl clockIncluded = clock in vcl # the incomparable case, include both! if redisClockIncluded and clockIncluded: # The redis (clock,rating) is already added so we just add # the new one to redis client.hset(key, json.dumps(clockDict), rating) # check if we can delete the old clock if a client in the new clock # is newer than an old client's clock time. for cli in clockDict: if redisClockDict[cli] < clockDict[cli]: client.hdel(key, json.dumps(redisClockDict)) # the more recent case, replace the old one with the new one if not redisClockIncluded and clockIncluded: client.hdel(key, json.dumps(redisClockDict)) client.hset(key, json.dumps(clockDict), rating) # Client never rated yet since we didn't find it in the hash so add it if isClientExist == False: client.hset(key, json.dumps(clockDict), rating) # calls method that returns the result as {rating, choices, clock} final_rating_result = get_final_rating_result(key) return final_rating_result
def put(self, key, metadata, value, destnode=None): if destnode is None: destnode = random.choice(DynamoNode.nodelist) if len(metadata) == 1 and metadata[0] is None: metadata = VectorClock() else: metadata = VectorClock.converge(metadata) putmsg = ClientPut(self, destnode, key, value, metadata) Framework.send_message(putmsg) return putmsg
def __init__(self): self.servers = [] self.rm = None # Initial selection of replica manager to communicate with try: self.rm = self._choose_replica() except ValueError as e: print(e) self.ts = VectorClock(REPLICA_NUM) # Vector timestamp of front end
def put(self, key, metadata, value, destnode=None): if destnode is None: # Pick a random node to send the request to destnode = random.choice(DynamoNode.nodelist) # Input metadata is always a sequence, but we always need to insert a # single VectorClock object into the ClientPut message if len(metadata) == 1 and metadata[0] is None: metadata = VectorClock() else: # A Put operation always implies convergence metadata = VectorClock.converge(metadata) putmsg = ClientPut(self, destnode, key, value, metadata) Framework.send_message(putmsg) return putmsg
def gossip(): # checks if I am currently gossiping with someone else if not shard.gossiping: incoming_data = json.loads(request.get_json()) shard.gossiping = True # causal context of the incoming node trying to gossip other_context = incoming_data["context"] # key store of incoming node trying to gossip other_kvstore = incoming_data["kv-store"] if other_kvstore == shard.keystore: return jsonify({"message": "We're equal."}), 200 # this is true if the other node determined i will be the tiebreaker tiebreaker = True if incoming_data["tiebreaker"] == ADDRESS else False incoming_Vc = VectorClock(view=None, clock=other_context) if shard.VC.selfHappensBefore(other_context): # I am before # i accept your data shard.keystore = other_kvstore shard.VC.merge(other_context, ADDRESS) shard.gossiping = False print("I HAPPENED BEFORE, I TAKE YOU" + str(shard.keystore), file=sys.stderr) return jsonify({"message": "I took your data"}), 200 elif incoming_Vc.selfHappensBefore(shard.VC.__repr__()): # I am after the incoming one, so return my data shard.gossiping = False return jsonify({ "message": "I am after you, take my data", "context": shard.VC.__repr__(), "kv-store": shard.keystore, }), 501 elif tiebreaker: shard.gossiping = False return jsonify({ "message": "I am the tiebreaker, take my data", "context": shard.VC.__repr__(), "kv-store": shard.keystore, }), 501 elif not tiebreaker: if bool(other_kvstore) and not incoming_Vc.allFieldsZero(): shard.keystore = other_kvstore shard.VC.merge(other_context, ADDRESS) shard.gossiping = False print("I DID NOT HAPPEN BEFORE BUT AM NOT THE TIEBREAKER" + str(shard.keystore), file=sys.stderr) return jsonify({"message": "I took your data"}), 200 shard.gossiping = False return jsonify({"message": "I am gossiping with someone else"}), 400
def put_rating(entity): # Check to make sure JSON is ok mimetype = mimeparse.best_match(['application/json'], request.headers.get('Accept')) if not mimetype: return abort(406) # Check to make sure the data we're getting is JSON if request.headers.get('Content-Type') != 'application/json': return abort(415) response.headers.append('Content-Type', mimetype) # Parse the request data = json.load(request.body) setrating = data.get('rating') setclock = VectorClock.fromDict(data.get('clock')) key = '/rating/'+entity #key = '/rating-and-clock/'+entity #sync_with_neighbour_queue(key) merge_with_db(setrating, setclock, key) sync_with_neighbour_queue(key) # lets grab the results of our work! result = get_final_rating_result(key) #final_rating_key = '/rating/'+entity #client.hset(final_rating_key, 'rating', result["rating"]) # Return rating return { "rating": result["rating"] }
def get(id, ec=False, port=lb_base): """ Get a value. By default, this will issue a strongly consistent read to the load balancer. Setting ec=True will request an eventually consistent read. Setting port to the port of a DB instance does a direct get to that instance, bypassing the load balancer. """ headers = { 'Accept': 'application/json' } url = endpoint(id, port) try: if ec: response = requests.get(url, headers=headers, params={'consistency': 'weak'}) else: response = requests.get(url, headers=headers) except Exception as e: raise Exception("Invalid request: url %s, exception %s" % (url, e)) try: data = response.json() except: raise Exception('Unexpected response: %s HTTP %d %s' % (url, response.status_code, response.text)) try: rating = float(data['rating']) except: rating = data['rating'] choices = data['choices'] #TODO: Handle return of malformed vector clock clocks = data['clocks'] return rating, choices, [VectorClock.fromDict(vcstr) for vcstr in clocks]
def rcv_getrsp(self, getrsp): seqno = getrsp.msg_id if seqno in self.pending_get_rsp: self.pending_get_rsp[seqno].add((getrsp.from_node, getrsp.value, getrsp.metadata)) if len(self.pending_get_rsp[seqno]) >= DynamoNode.R: _logger.info("%s: read %d copies of %s=? so done", self, DynamoNode.R, getrsp.key) # _logger.debug(" copies at %s", [(node.name, value) for (node, value, _) in self.pending_get_rsp[seqno]]) # Coalesce all compatible (value, metadata) pairs across the responses results = VectorClock.coalesce2([(value, metadata) for (node, value, metadata) in self.pending_get_rsp[seqno]]) # Tidy up tracking data structures original_msg = self.pending_get_msg[seqno] del self.pending_req[GetReq][seqno] del self.pending_get_rsp[seqno] del self.pending_get_msg[seqno] # Reply to the original client, including all received values client_getrsp = ClientGetRsp(original_msg, [value for (value, metadata) in results], [metadata for (value, metadata) in results]) #modified con = self.connections[self.servers.index(client_getrsp.to_node)] Framework.send_message(client_getrsp, con) ######################################## else: pass # Superfluous reply
def put(self, key, metadata, value, destnode=None): # print "node: ",len(DynamoNode.nodelist) #modified temp = metadata while True: ################################### metadata = temp if destnode is None: # Pick a random node to send the request to destnode = random.choice(DynamoNode.nodelist) # Input metadata is always a sequence, but we always need to insert a # single VectorClock object into the ClientPut message # print '-------------------------choice:', destnode if len(metadata) == 1 and metadata[0] is None: metadata = VectorClock() else: # A Put operation always implies convergence metadata = VectorClock.converge(metadata) putmsg = ClientPut(self.addr, destnode, key, value, metadata) #modified con = self.connections[self.servers.index(destnode)] result = Framework.send_message(putmsg, con) if result is not False: break destnode = None ################################## return result
def rcv_getrsp(self, getrsp): seqno = getrsp.msg_id if seqno in self.pending_get_rsp: self.pending_get_rsp[seqno].add( (getrsp.from_node, getrsp.value, getrsp.metadata)) if len(self.pending_get_rsp[seqno]) >= DynamoNode.R: _logger.info("%s: read %d copies of %s=? so done", self, DynamoNode.R, getrsp.key) _logger.debug( " copies at %s", [(node.name, value) for (node, value, _) in self.pending_get_rsp[seqno]]) # Coalesce all compatible (value, metadata) pairs across the responses results = VectorClock.coalesce2([ (value, metadata) for (node, value, metadata) in self.pending_get_rsp[seqno] ]) # Tidy up tracking data structures original_msg = self.pending_get_msg[seqno] del self.pending_req[GetReq][seqno] del self.pending_get_rsp[seqno] del self.pending_get_msg[seqno] # Reply to the original client, including all received values client_getrsp = ClientGetRsp( original_msg, [value for (value, metadata) in results], [metadata for (value, metadata) in results]) Framework.send_message(client_getrsp) else: pass # Superfluous reply
def state_transfer(): data = request.get_json() other_vector_clock = data["context"] if shard.VC.selfHappensBefore(other_vector_clock): shard.keystore = data["kv-store"] shard.vc = VectorClock(view=None, clock=other_vector_clock) return {"message": "Acknowledged"}, 201
def send_request(self, request): ''' Method invoked by client to send a request. Params: (tuple) request: command to execute and arguments for the command Returns: If the request is a query, return the results of the query, otherwise a confirmation message. ''' r_type = self._request_type(request) # Find a replica manager to send request to if the original is # unavailable if self.rm is not None: try: rm_status = self.rm.get_status() print(rm_status) if rm_status == Status.OFFLINE.value: self.rm = self._choose_replica() except Pyro4.errors.ConnectionClosedError: self.rm = self._choose_replica() else: self.rm = self._choose_replica() if r_type == RType.UPDATE: rm_ts = self.rm.send_update(request, self.ts.value(), str(uuid.uuid4())) print('Update sent: ', request) self.ts.merge(VectorClock.fromiterable(rm_ts)) print('Front end timestamp: ', self.ts.value()) return 'Update submitted!' elif r_type == RType.QUERY: val, rm_ts = self.rm.send_query(request, self.ts.value()) print('Query sent: ', request) self.ts.merge(VectorClock.fromiterable(rm_ts)) print('Front end timestamp: ', self.ts.value()) return val
def get_rating(entity): print "Called GET on ENTITY", entity key = '/rating/'+entity hashed_entity = hash(entity) db_index = hashed_entity % ndb print "entity primary is", db_index #Calculate rating clocks = {} choices = 0 #print "TABLE", table print "doing GOSSIP for read:", clocks gossip_protocol() table = client.hgetall(key) clocks = table.keys() choices = table.values() print "CLOCKS", clocks print "CHOICES:", choices print "TYPE OF CLOCKS", type(clocks) total = 0 for i in range (len(choices)): choices[i] = float(choices[i]) total = total + choices[i] clocksArr = [] for clock in clocks: print "CLOCK", type(clock) print "CLOCK", clock clock = VectorClock.fromDict(ast.literal_eval(clock)) clocksArr.append(clock.asDict()) print "TYPE OF CLOCK", type(clock.asDict()) print "CLOCKSARR", clocksArr checkDigestList() if (total > 0): finalrating = float(total/len(choices)) return { "rating": finalrating, "choices": choices, "clocks": clocksArr } elif (len(clocksArr) > 0): return { "rating": 0, "choices": choices, "clocks": clocksArr} else: return { "rating": 0, "choices": [], "clocks": []}
def put_message(self, fromnode, key, value, metadata): #print 'client put!!!' metadata = pickle.loads(metadata) if metadata is None: metadata = VectorClock() else: # A Put operation always implies convergence metadata = VectorClock.converge(metadata) putmsg = ClientPut(fromnode, self.addr, key, value, metadata) self.rcv_clientput(putmsg)
def checkList(msg, host): temp = VectorClock() data = getClock(host) for (i,j) in data: temp.update(i,j) for (index,msgi) in enumerate(msg): aux = Vector.getCounter(str(index)) try: aux_1 = temp.getCounter(str(index)) Vector.update(str(index),int(aux_1)) except: continue for (i,mes) in enumerate(messages): if i == index: mes[0] = msgi[0]
def gossip(self): if (gossiping == False): gossiping = True replica_ip_addresses = self.shard_replicas(self.shard_ID) replica_index = random.randint(0, len(replica_ip_addresses) - 1) while (self.shard_ID == replica_index): replica_index = random.randint(0, len(replica_ip_addresses) - 1) replica = replica_ip_addresses[replica_index] tiebreaker = replica if ( replica_index > self.shard_ID) else self.ADDRESS data = { "context": self.VC.__repr__(), "kv-store": self.keystore, "tiebreaker": tiebreaker } content, code = self.router.PUT(replica, '/kv-store/internal/gossip/', data, False) if (code == 200): # 200: They took my data gossiping = False elif (code == 501): # 501: # the other node was either the tiebreaker or happened after self # so this node takes its data # context of node other_context = content["context"] # key store of incoming node trying to gossip other_kvstore = content["kv-store"] self.VC = VectorClock(view=None, clock=other_context) self.keystore = other_kvstore #self happened before other, take its kvstore and merge with my clock # concurrent but other is tiebreaker else: # 400: Other is already gossiping with someone else # ELSE: unresponsive node (maybe itll be code 404?) gossiping = False else: # Curretly gossiping, # Will call after gossip backoff again gossiping = False
def testConverge(self): self.c1.update('B', 1) c3 = copy.deepcopy(self.c1) c4 = copy.deepcopy(self.c1) # Diverge two of the clocks c3.update('X', 200) self.c1.update('Y', 100) cx = VectorClockTimestamp.converge((self.c1, self.c2, c3, c4)) self.assertEquals(str(cx), "{A:1, B:2, X:200, Y:100}") cy = VectorClockTimestamp.converge(VectorClock.coalesce((self.c1, self.c2, c3, c4))) self.assertEquals(str(cy), "{A:1, B:2, X:200, Y:100}")
def get(self, channel): resp = requests.get('http://localhost:'+str(self.port)+'/q/'+channel, headers={'content-type': 'application/json'}) jresp = resp.json() if len(jresp) > 0: for k in jresp: if (isinstance(jresp[k],dict) and jresp[k].keys() == [CLOCK_CODE]): jresp[k] = [VectorClock.fromDict(dc) for dc in jresp[k][CLOCK_CODE]] return jresp else: return None
def put_message(self, fromnode, key, value, metadata): #print 'client put!!!' metadata = pickle.loads(metadata) if metadata is None: metadata = VectorClock() else: # A Put operation always implies convergence metadata = VectorClock.converge(metadata) putmsg = ClientPut(fromnode, self.addr, key, value, metadata) Framework.send_message(putmsg) # Framework.schedule(timers_to_process=0) Framework.schedule()
def __init__(self, ID, IP, servers_list): super(Server, self).__init__() self.id = int(ID) self.ip = str(IP) self.servers_list = servers_list self.serverIndex = self.servers_list.index(self.ip) self.vectorClock = VectorClock(self.serverIndex, len(self.servers_list)) self.myLogger = mylogger.Logger(self.ip) self.blackboard = distributedboard.Blackboard(self.vectorClock, self.myLogger) self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10) # list all REST URIs # if you add new URIs to the server, you need to add them here # API for Clients self.route('/', callback=self.index) self.get('/board', callback=self.get_board) self.post('/', callback=self.post_index) self.post('/board', callback=self.post_board) self.post('/board/<number>/', callback=self.post_board_ID) # API for Scripts self.get('/serverlist', callback=self.get_serverlist) self.get('/board/alldata',callback=self.get_board_data) # API for Server Internals self.post('/propagate', callback=self.post_propagate) self.post('/update_ctrl', callback=self.post_update_ctrl) # we give access to the templates elements self.get('/templates/<filename:path>', callback=self.get_template) # You can have variables in the URI, here's an example # self.post('/board/<element_id:int>/', callback=self.post_board) where post_board takes an argument (integer) called element_id # Lab3 Option Task!!!! self.get('/operation_log_size', callback=self.get_operation_log_size) thread = Thread(target=self.checkUpdatesOfOtherServers) thread.start()
def __init__(self, replica_id, stopper, status=None): super().__init__() self._id = replica_id # Replica status properties self.failure_prob = 0.1 self.overload_prob = 0.2 self.auto_status = True if status not in [n.value for n in list(Status)]: print('Invalid status provided, defaulting to active.') self.status = Status.ACTIVE else: self.status = Status(status) self.auto_status = False print(f'Status set to {status}.', 'Automatic status updating disabled.') # Gossip Architecture State self.value_ts = VectorClock(REPLICA_NUM) # aka data timestamp self.replica_ts = VectorClock(REPLICA_NUM) # aka log timestamp self.update_log = [] self.ts_table = [ VectorClock(REPLICA_NUM) if i != self._id else None for i in range(REPLICA_NUM) ] self.executed = [] self.pending_queries = queue.Queue() self.query_results = {} self.interval = 8.0 # interval between gossip exchanges self.other_replicas = self._find_replicas() self.stopper = stopper # Used to indicate to server to stop # Locks for objects shared between threads self.vts_lock = threading.Lock() # for value_ts self.rts_lock = threading.Lock() # for replica_ts self.log_lock = threading.Lock() # for update_log
def process_request_data(entity, rating, newclocks): print "processing request data" # Weave the new rating into the current rating list ratingkey = "" if "rating" not in entity: ratingkey = '/rating/'+entity else: ratingkey = entity spectator = True newClockList = [] table = client.hgetall(ratingkey) clocks = table.keys() choices = table.values() print "table", table print "clocks", clocks print "choices", choices print "newclocks", newclocks print "type of newclocks", type(newclocks) if (len(clocks) == 0): newClockList.append(newclocks) print "appending new clock to our clock list because our clock list is empty. NewClockList:", newClockList print "type of newclocks", type(newclocks) for entry in clocks: entry = VectorClock.fromDict(ast.literal_eval(entry)) if (entry < newclocks): print "Our current clock is older!", entry, newclocks spectator = False print "STORING IN REDIS DATABASE", newclocks.asDict(), rating client.hset(ratingkey, newclocks.asDict(), rating) client.hdel(ratingkey, entry.asDict()) newClockList.append(newclocks) elif (newclocks <= entry): spectator = False if(spectator): print "spectator", spectator for clock in newClockList: print type(clock) print "STORING IN REDIS DATABASE", ratingkey, clock.asDict(), rating client.hset(ratingkey, clock.asDict(), rating) # Return the new rating for the entity return rating, newClockList
def put_rating(entity): # Check to make sure JSON is ok mimetype = mimeparse.best_match(['application/json'], request.headers.get('Accept')) if not mimetype: return abort(406) # Check to make sure the data we're getting is JSON if request.headers.get('Content-Type') != 'application/json': return abort(415) response.headers.append('Content-Type', mimetype) # Parse the request data = json.load(request.body) rating = data.get('rating') clock = VectorClock.fromDict(data.get('clock')) # Basic sanity checks on the rating if isinstance(rating, int): rating = float(rating) if not isinstance(rating, float): return abort(400) # YOUR CODE HERE # HASH THE ENTITY TO DETERMINE ITS SHARD hashed_entity = hash(entity) # find out which database process this entity hashes to db_index = hashed_entity % ndb print "ENTITY has PRIMARY DB as", db_index, entity # change the dbBasePort to be the port on which the hashed database is talking on dbBasePort = 3000 dbBasePort = dbBasePort + db_index # PUT THE PORT FOR THE CORRECT SHARD IN url below url = 'http://localhost:'+str(dbBasePort)+'/rating/'+entity print "URL", url # RESUME BOILERPLATE CODE... # Update the rating res = requests.put(url, data=json.dumps({'rating': rating, 'clock': clock.asDict()}), headers={'content-type': 'application/json'} ) # Return the new rating for the entity return { "rating": res.json()['rating'] }
def get_from_redis(tea_name): try: data = eval(client.get("tea:%s:json" % tea_name)) except: #print "*** Error [get from redis] no key found for the tea:" + tea_name return None,None,None try: rating = data["rating"] except: rating = data["rating"] choices = data["choices"] clocks = data["clocks"] #vc.clock, ex) {'c1': 10, 'c0': 7} return rating, choices, VectorClock.fromDict(clocks)
def put_rating(entity): print "Called PUT on ENTITY", entity hashed_entity = hash(entity) db_index = hashed_entity % ndb print "entity primary is", db_index # Check to make sure JSON is ok mimetype = mimeparse.best_match(['application/json'], request.headers.get('Accept')) if not mimetype: return abort(406) # Check to make sure the data we're getting is JSON if request.headers.get('Content-Type') != 'application/json': return abort(415) response.headers.append('Content-Type', mimetype) # Parse the request data = json.load(request.body) setrating = data.get('rating') setclock = VectorClock.fromDict(data.get('clock')) key = '/rating/'+ entity finalrating = 0 newClockList = [] print "ENTITY", entity print "setrating", setrating print "setclock", setclock print "write rating to REDIS Database:", entity, setrating, setclock (finalrating, newClockList) = process_request_data(entity, setrating, setclock) print "newClockList:", newClockList # record change in digest list global digest_list print "record change in digest_list:", id, key, finalrating, setclock for clock in newClockList: digest_list.append({"id": id, "key": key, "rating": finalrating, "clock": clock}) gossip_protocol() checkDigestList() # Return rating return { "rating": finalrating }
def put_rating(entity): # Check to make sure JSON is ok mimetype = mimeparse.best_match(['application/json'], request.headers.get('Accept')) if not mimetype: return abort(406) # Check to make sure the data we're getting is JSON if request.headers.get('Content-Type') != 'application/json': return abort(415) response.headers.append('Content-Type', mimetype) data = json.load(request.body) if DEBUG: print "[put_rating]", data recieved_rating = data.get('rating') recieved_vc = VectorClock.fromDict(data.get('clocks')) # Basic sanity checks on the rating if isinstance(recieved_rating, int):recieved_rating = float(recieved_rating) if not isinstance(recieved_rating, float): return abort(400) # Weave the new rating into the current rating list key = '/rating/'+entity tea_name = entity # COMPUTE THE MEAN, finalrating after converge existing and recieving value finalrating, choices, new_vc_list = vector_converge(tea_name,recieved_rating,recieved_vc) # SET THE RATING, CHOICES, AND CLOCKS IN THE DATABASE FOR THIS KEY if choices!=None: put_to_redis(tea_name, finalrating,choices,new_vc_list) #store new score # YOUR CODE HERE # MERGE WITH CURRENT VALUES FOR THIS KEY # REPLACE FOLLOWING WITH CORRECT FINAL RATING # finalrating = 0.0 # SAVE NEW VALUES # GOSSIP # Return rating return { "rating": finalrating }
def get(id): headers = { 'Accept': 'application/json' } url = endpoint+'/rating/'+id try: request = requests.get(url, headers=headers) data = request.json() except: raise Exception('Invalid request: %s HTTP %d %s' % (url, request.status_code, request.text)) try: rating = float(data['rating']) except: rating = data['rating'] choices = json.loads(data['choices']) #TODO: Handle return of malformed vector clock clocks = json.load(StringIO.StringIO(data['clocks'])) return rating, choices, [VectorClock.fromDict(vcstr) for vcstr in clocks]
def put_rating(entity): # Check to make sure JSON is ok mimetype = mimeparse.best_match(['application/json'], request.headers.get('Accept')) if not mimetype: return abort(406) # Check to make sure the data we're getting is JSON if request.headers.get('Content-Type') != 'application/json': return abort(415) response.headers.append('Content-Type', mimetype) # Parse the request data = json.load(request.body) rating = data.get('rating') clock = VectorClock.fromDict(data.get('clock')) query_param_dict = parse_qs(urlparse(request.url).query, keep_blank_values=True) # Basic sanity checks on the rating if isinstance(rating, int): rating = float(rating) if not isinstance(rating, float): return abort(400) # WE ARE NOT SUPPOSE TO HAVE EVENTUAL CONSISTENT WRITES. What does that even mean... #if 'consistency' in query_param_dict.keys() and query_param_dict['consistency'] == 'weak': # shard_number = get_shard_number(entity) #else: # shard_number = get_shard_number(entity, consistent=True) dbBasePort = get_shard_number(entity, consistent=True) url = 'http://localhost:'+str(dbBasePort)+'/rating/'+entity res = requests.put(url, data=json.dumps({'rating': rating, 'clock': clock.asDict()}), headers={'content-type': 'application/json'}) # Return the new rating for the entity return { "rating": res.json()['rating'] }
def gossip_protocol(): # first check the channel to see if there is anything in there isItemInChannel = True newClockList = [] global queue global id global digest_list while (isItemInChannel): #TODO: remember do this only if ndb > 1 msg = [] if (id == 0): msg = queue.get(str(ndb - 1)) if (not msg): isItemInChannel = False else: msg = queue.get(str(id - 1)) if (not msg): isItemInChannel = False print "isItemInChannel", isItemInChannel if (isItemInChannel): print "MSG", msg if (id != msg['id']): ratingValue = msg['rating'] key = msg['key'] clock = msg['clock'] # turn the clock retrieved off channel into a vector clock type clock = VectorClock.fromDict(clock) # TODO should I be grabbing the clock from the msg and storing that in REDIS print "writing this key retrieved off the channel to REDIS", key, ratingValue, clock (ratingValue, newClockList) = process_request_data(key, ratingValue, clock) print "append these values to this instance's digest_list, for later gossip to its neighbour:", ratingValue, clock digest_list.append({"id": id, "key": key, "rating": ratingValue, "clock": clock})
def put_rating(entity): # Check to make sure JSON is ok type = mimeparse.best_match(['application/json'], request.headers.get('Accept')) if not type: return abort(406) # Check to make sure the data we're getting is JSON if request.headers.get('Content-Type') != 'application/json': return abort(415) response.headers.append('Content-Type', type) # Read the data sent from the client data = json.load(request.body) recieved_rating = data.get('rating') recieved_vc = VectorClock.fromDict(data.get('clocks')) # Basic sanity checks on the rating if isinstance(recieved_rating, int):recieved_rating = float(recieved_rating) if not isinstance(recieved_rating, float): return abort(400) # Weave the new rating into the current rating list key = '/rating/'+entity tea_name = entity # COMPUTE THE MEAN, finalrating after converge existing and recieving value finalrating, choices, new_vc = vector_converge(tea_name,recieved_rating,recieved_vc) # SET THE RATING, CHOICES, AND CLOCKS IN THE DATABASE FOR THIS KEY if choices!=None: put_to_redis(tea_name, finalrating,choices,new_vc) #store new score # Return the new rating for the entity return { "rating": finalrating }
def put_rating(entity): # Check to make sure JSON is ok mimetype = mimeparse.best_match(['application/json'], request.headers.get('Accept')) if not mimetype: return abort(406) # Check to make sure the data we're getting is JSON if request.headers.get('Content-Type') != 'application/json': return abort(415) response.headers.append('Content-Type', mimetype) # Parse the request data = json.load(request.body) rating = data.get('rating') clock = VectorClock.fromDict(data.get('clock')) # Basic sanity checks on the rating if isinstance(rating, int): rating = float(rating) if not isinstance(rating, float): return abort(400) # YOUR CODE HERE # HASH THE ENTITY TO DETERMINE ITS SHARD # PUT THE PORT FOR THE CORRECT SHARD IN url below url = 'http://localhost:'+str(dbBasePort+hashEntity(entity, ndb))+'/rating/'+entity # RESUME BOILERPLATE CODE... # Update the rating res = requests.put(url, data=json.dumps({'rating': rating, 'clock': clock.asDict()}), headers={'content-type': 'application/json'}) # Return the new rating for the entity return { "rating": res.json()['rating'] }
def convert2vc_list(json_clocks_list): return [VectorClock.fromDict(clocks) for clocks in eval(json_clocks_list)]
class Node(KV_store): '''docstring for node class''' def __init__(self, router, address, view, replication_factor): KV_store.__init__(self, address) self.gossipScheduled = False self.lastToGossip = False self.gossiping = False #self.sched = Scheduler() #self.sched.start() self.history = [('Initialized', datetime.now())] self.ADDRESS = address self.VC = VectorClock(view=view, clock=None) self.ring_edge = 691 if len( view) < 100 else 4127 # parameter for hash mod value self.repl_factor = replication_factor self.num_shards = 0 self.virtual_range = 16 self.shard_interval = self.ring_edge // self.virtual_range self.nodes = [] self.shard_ID = -1 self.V_SHARDS = [] # store all virtual shards self.P_SHARDS = [[] for i in range(0, self.num_shards) ] # map physical shards to nodes self.virtual_translation = {} # map virtual shards to physical shards self.backoff_mod = 59 self.router = router self.view_change(view, replication_factor) def __repr__(self): return { 'ADDRESS': self.ADDRESS, 'shard_ID': self.shard_ID, 'P_SHARDS': self.P_SHARDS, 'KEYS': self.keystore } def __str__(self): return 'ADDRESS: ' + self.ADDRESS + '\nshard_ID: ' + str( self.shard_ID) + '\n: ' + (', '.join(map( str, self.keystore))) + '\nP_SHARDS: ' + (', '.join( map(str, self.P_SHARDS))) ''' give a state report this includes node data and distribution of keys to nodes ''' def state_report(self): state = self.__repr__() state['HISTORY'] = {} string = 'node' itr = 1 for event in self.history: key = string + str(itr) itr += 1 state['HISTORY'][key] = event return state ''' return all physical shards ''' def all_shards(self): return self.P_SHARDS def all_nodes(self): return self.nodes ''' get all nodes in this shard ''' def shard_replicas(self, shard_ID): return self.P_SHARDS[shard_ID] ''' hash frunction is a composit of xxhash modded by prime ''' def hash(self, key): hash_val = hasher.xxh32(key).intdigest() # may be expensive but will produce better distribution return (hash_val % self.ring_edge) ''' evenly distribute nodes into num_shard buckets ''' def even_distribution(self, repl_factor, nodes): nodes.sort() num_shards = (len(nodes) // repl_factor) replicas = (len(nodes) // num_shards) overflow = (len(nodes) % num_shards) shards = [[] for i in range(0, num_shards)] node_iter = 0 for shard in range(num_shards): extra = (1 if shard < overflow else 0) interval = replicas + extra shards[shard] = nodes[node_iter:(node_iter + interval)] node_iter += interval return shards ''' Perform a key operation, ie. find the correct shard given key. First hash the key then perform binary search to find the correct shard to store the key. ''' def find_match(self, key): ring_val = self.hash(key) # get the virtual shard number v_shard = self.find_shard('predecessor', ring_val) # convert to physical shard shard_ID = self.virtual_translation[v_shard] return shard_ID ''' perform binary search on list of virtual shards given ring value we need to be careful about wrap around case. If ring_val >= max_ring_val, return 0 ''' def find_shard(self, direction, ring_val): if direction == 'predecessor': v_shard = bisect_left(self.V_SHARDS, ring_val) if v_shard: return self.V_SHARDS[v_shard - 1] return self.V_SHARDS[-1] elif direction == 'successor': v_shard = bisect_right(self.V_SHARDS, ring_val) if v_shard != len(self.V_SHARDS): return self.V_SHARDS[v_shard] return self.V_SHARDS[0] ''' respond to view change request, perform a reshard this can only be done if all nodes have been given new view 2 cases: 1. len(nodes) + 1 // r > or < shard_num: we need to add or remove a shard to maintain repl_factor 2. add and/or remove nodes ''' def view_change(self, view, repl_factor): new_num_shards = len(view) // repl_factor # we should always have more than one shard if new_num_shards == 1: new_num_shards = 2 buckets = self.even_distribution(repl_factor, view) # add nodes and shards for shard in range(len(buckets)): if self.ADDRESS in buckets[shard]: self.shard_ID = shard #if not self.gossipScheduled: # self.sched.add_interval_job(self.gossip, seconds=self.gossip_backoff()) # self.gossipScheduled = True if shard >= len(self.P_SHARDS): self.add_shard(buckets[shard]) else: self.update_shard(buckets[shard], shard) # remove empty shards for shard_ID in range(len(buckets), len(self.P_SHARDS)): self.remove_shard(shard_ID) for old_node in list(set(self.nodes) - set(view)): self.nodes.pop(self.nodes.index(old_node)) ''' add shard to view ''' def add_shard(self, nodes): #print('adding new shard', file=sys.stderr) new_shards = [] p_shard = self.num_shards if p_shard >= len(self.P_SHARDS): self.P_SHARDS.append([]) # if nodes are new, add them to self.nodes and self.P_SHARDS for node in nodes: if node not in self.nodes: self.nodes.append(node) if node not in self.P_SHARDS[p_shard]: self.P_SHARDS[p_shard].append(node) # create virtual shards for v_shard in range(self.virtual_range): virtural_shard = str(p_shard) + str(v_shard) ring_num = self.hash(virtural_shard) # unique value on 'ring' # if ring_num is already in list, skip this iteration if ring_num in self.V_SHARDS: continue self.V_SHARDS.append(ring_num) self.virtual_translation[ring_num] = p_shard successor = self.find_shard('successor', ring_num) predecessor = self.find_shard('predecessor', ring_num) # send appropriate keys to v_shard if self.virtual_translation[predecessor] == self.shard_ID: self.atomic_key_transfer(predecessor, ring_num, successor) self.num_shards += 1 self.V_SHARDS.sort() def update_shard(self, nodes, shard_ID): for node in self.P_SHARDS[shard_ID]: if node not in nodes: # must be getting deleted or moved if node == self.ADDRESS: # self is getting deleted for new_node in nodes: print('moving keys to', new_node, file=sys.stderr) success = self.final_state_transfer(new_node) self.P_SHARDS[shard_ID].pop( self.P_SHARDS[shard_ID].index(node)) for node in nodes: if node not in self.nodes: self.nodes.append(node) if node not in self.P_SHARDS[shard_ID]: self.P_SHARDS[shard_ID].append(node) ''' remove from all internal data structures if there are no nodes in shard ''' def remove_shard(self, shard_ID): self.P_SHARDS.pop(shard_ID) ''' transfer keys from from self to new shard, according to consistent hashing rules ''' def atomic_key_transfer(self, predecessor, new_shard, successor): for key in list(self.keystore): key_hash = self.hash(key) if key_hash < successor and key_hash > predecessor: shard_destination = self.virtual_translation[new_shard] replicas = self.shard_replicas(shard_destination) path = '/kv-store/keys/' + str(key) + '/forward' data = {'value': self.keystore[key], 'causal-context': {}} data = json.loads(json.dumps(data)) status_code = 400 print('I should definately send', key, 'to', replicas, file=sys.stderr) # try to message all replicas, if a replica is unresponsive, ignore it for replica in replicas: if replica in self.P_SHARDS[self.shard_ID]: continue print('sending', key, 'to', replica, file=sys.stderr) try: res = self.router.PUT(replica, path, data) status_code = 201 #except Exception as e: # print('exception caught in atomic_key_transfer:', e, file=sys.stderr) # continue # at least one replica responded if status_code < 400: print('deleting key from my keystore', file=sys.stderr) del self.keystore[key] if key in self.keystore: print('there was a problem deleting the key', file=sys.stderr) else: print( 'replicas did not respond when transfering keys', file=sys.stderr) except: continue ''' send final state of node before removing a node ''' def final_state_transfer(self, node): data = { "kv-store": self.keystore, "causal-context": self.VC.__repr__() } replica_ip_addresses = self.shard_replicas(self.shard_ID) for replica in replica_ip_addresses: if (replica != self.ADDRESS): try: res = self.router.PUT(replica, '/kv-store/internal/state-transfer', data) except: continue if status_code == 201: return True return False ''' handle node failures, check if node should be removed or not ''' def handle_unresponsive_node(self, node): pass def gossip_backoff(self): return hash(self.ADDRESS) % random.randint(5, 15) def gossip(self): if (self.gossiping == False) and (not self.lastToGossip) and ( self.repl_factor > 1): self.lastToGossip = True current_key_store = self.keystore self.gossiping = True replica_ip_addresses = self.shard_replicas(self.shard_ID) replica = replica_ip_addresses[(random.randint( 0, len(replica_ip_addresses) - 1))] while (self.ADDRESS == replica): replica = replica_ip_addresses[(random.randint( 0, len(replica_ip_addresses) - 1))] myNumber = int((self.ADDRESS.split(".")[3]).split(":")[0]) otherNumber = int((replica.split(".")[3]).split(":")[0]) tiebreaker = replica if (otherNumber > myNumber) else self.ADDRESS data = { "causal-context": self.VC.__repr__(), "kv-store": current_key_store, "tiebreaker": tiebreaker } try: response = self.router.PUT(replica, '/kv-store/internal/gossip/', json.dumps(data)) code = response.status_code except: code = -1 self.gossiping = False
def update(self, node, counter): VectorClock.update(self, node, counter) self.clock_time[node] = time.time() self._maybe_truncate() return self
messages = [] servers_list = [] know_servers = [] layoutx = [] layouty = [] x = 0 y = 0 mylink = None NO_ERROR = 200 Vector = VectorClock() @get('/') @view('index') def index(): return {'messages': messages, 'layoutx' : layoutx, 'layouty' : layouty, 'users' : servers_list} #@route('/<on>') #@view('index') #def index(name): # return {'messages': messages, 'layoutx' : layoutx, 'layouty' : layouty,'users' : servers_list} #Recebe a mansagem e sincroniza com a lista, atualizando também o vectorClock respectivo a cada posição da #tabela @post('/send') def sendMessage(): global x
class FrontEnd: ''' Class for Front End Server within the distributed system. ''' def __init__(self): self.servers = [] self.rm = None # Initial selection of replica manager to communicate with try: self.rm = self._choose_replica() except ValueError as e: print(e) self.ts = VectorClock(REPLICA_NUM) # Vector timestamp of front end def send_request(self, request): ''' Method invoked by client to send a request. Params: (tuple) request: command to execute and arguments for the command Returns: If the request is a query, return the results of the query, otherwise a confirmation message. ''' r_type = self._request_type(request) # Find a replica manager to send request to if the original is # unavailable if self.rm is not None: try: rm_status = self.rm.get_status() print(rm_status) if rm_status == Status.OFFLINE.value: self.rm = self._choose_replica() except Pyro4.errors.ConnectionClosedError: self.rm = self._choose_replica() else: self.rm = self._choose_replica() if r_type == RType.UPDATE: rm_ts = self.rm.send_update(request, self.ts.value(), str(uuid.uuid4())) print('Update sent: ', request) self.ts.merge(VectorClock.fromiterable(rm_ts)) print('Front end timestamp: ', self.ts.value()) return 'Update submitted!' elif r_type == RType.QUERY: val, rm_ts = self.rm.send_query(request, self.ts.value()) print('Query sent: ', request) self.ts.merge(VectorClock.fromiterable(rm_ts)) print('Front end timestamp: ', self.ts.value()) return val def _choose_replica(self): ''' Select a replica manager to communicate with. Return: Remote object for a replica manager ''' for server in self.servers: server._pyroRelease() self.servers = self._find_replicas() stat = {server: server.get_status() for server in self.servers} available = [] num_offline = list(stat.values()).count(Status.OFFLINE.value) num_active = list(stat.values()).count(Status.ACTIVE.value) if num_active > 0: available = [ k for k in stat.keys() if stat[k] == Status.ACTIVE.value ] elif num_offline == len(self.servers): raise Exception('All servers offline') else: available = [ k for k in stat.keys() if stat[k] != Status.OFFLINE.value ] if not available: return None return random.choice(available) @staticmethod def _request_type(request): ''' Determine whether a request is an update or query. Params: (tuple) request: request to check Returns: Enum representing the type of request ''' op = request[0] op_type = op.split('.')[0] if op_type == 'u': return RType.UPDATE if op_type == 'q': return RType.QUERY raise ValueError('command not recognised') @staticmethod def _find_replicas(): ''' Find all online replica managers. Returns: servers: list of remote server objects for replica managers ''' servers = [] with Pyro4.locateNS() as ns: for server, uri in ns.list(prefix="network.replica.").items(): print("found replica", server) servers.append(Pyro4.Proxy(uri)) if not servers: raise ValueError( "No servers found! (are the movie servers running?)") return servers[:REPLICA_NUM]
def vector_converge(tea_name,r_rating, r_vc): conv_list =[] if DEBUG: print "\n[new recieved]tea_name:" , tea_name, "rating:", r_rating, "new v clocks:",r_vc rating, choices, vc_list = get_from_redis(tea_name) if rating == None: # no key found for the tea conv_rating = r_rating conv_choices = [r_rating] conv_list.append(r_vc) else: vc = merge_dict(vc_list) coal = VectorClock.coalesce([vc,r_vc]) conv = VectorClock.converge([vc,r_vc]) if DEBUG: print "[previous]vc_list to vc format:", vc print "[previous]tea_name:" , tea_name, "rating:", rating,"choices:", choices print "[compare] r_vc == vc :", (r_vc == vc) print "[compare] r_vc > vc: " , (r_vc > vc) print "[compare] r_vc < vc: " , (r_vc < vc) print "[compare] r_vc >= vc: " ,( r_vc >= vc) print "[compare] coalesce = " , VectorClock.coalesce([vc,r_vc]) print "[compare] converge = " , VectorClock.converge([vc,r_vc]) print "[compare] r_vc in v.coalesce = ",r_vc in VectorClock.coalesce([vc,r_vc]) print "[compare] vc in v.coalesce = ",vc in VectorClock.coalesce([vc,r_vc]) print "[compare] r_vc in v.converge = ",r_vc in seperate_to_vc_list(conv) print "[compare] vc in v.converge = ",vc in seperate_to_vc_list(conv) print "[compare] r_vc < v.converge ", r_vc <= VectorClock.converge([vc,r_vc]) print "[compare] vc < v.converge ", vc <= VectorClock.converge([vc,r_vc]) is_incomparable = (r_vc in coal) and (vc in coal) and ( r_vc <= conv ) and ( vc <= conv) if r_vc == vc: conv_rating = r_rating conv_choices = None conv_list = None elif r_vc > vc: # more recent data #compute mean value,recent vector clocksd, and choices conv_rating = r_rating conv_choices = [r_rating] conv_list.append(r_vc) elif r_vc < vc: # ignore if DEBUG: print "[ignore] r_vc<vc" conv_rating = rating conv_choices = None conv_list = None elif is_incomparable: combined_clocks_list = VectorClock.coalesce([vc,r_vc]) if DEBUG: print "combined clocks:",combined_clocks_list print "----- [incom] r_vc:", r_vc print "----- [incom] vc:", vc print "----- [incom] coal:", coal print "----- [incom] conv:", conv print "----- [incom] choices:",choices, ",r_choices:", r_rating conv_vc_list = seperate_to_vc_list(conv) #find choices related with conv_list conv_choices = [] conv_list=[] conv_list,conv_choices = decide_to_append_vc_list(vc_list,choices,conv_vc_list,conv_list,conv_choices) #check previous vc list conv_list,conv_choices = decide_to_append_vc_list([r_vc],[r_rating],conv_vc_list,conv_list,conv_choices)#check received r_vc list conv_list,conv_choices = eliminate_old_clocks(conv_list,conv_choices) #double check conv_rating = meanAvg(conv_choices) if DEBUG: print "----- [incomp] new conv_choices:",conv_choices print "----- [incomp] new conv_list:", conv_list print "----- [incomp] new conv_rating:",conv_rating return conv_rating, conv_choices,conv_list
def gossip(self): if (self.gossiping == False): current_key_store = self.keystore self.gossiping = True replica_ip_addresses = self.shard_replicas(self.shard_ID) replica = replica_ip_addresses[(random.randint( 0, len(replica_ip_addresses) - 1))] while (self.ADDRESS == replica): replica = replica_ip_addresses[(random.randint( 0, len(replica_ip_addresses) - 1))] myNumber = int((self.ADDRESS.split(".")[3]).split(":")[0]) otherNumber = int((replica.split(".")[3]).split(":")[0]) tiebreaker = replica if (otherNumber > myNumber) else self.ADDRESS data = { "context": self.VC.__repr__(), "kv-store": current_key_store, "tiebreaker": tiebreaker } print("sending to node: " + replica + " " + str(data), file=sys.stderr) try: response = self.router.PUT(replica, '/kv-store/internal/gossip/', json.dumps(data)) except: code = -1 code = response.status_code if (code == 200): # 200: They took my data self.gossiping = False elif (code == 501): content = response.json() # 501: # the other node was either the tiebreaker or happened after self # so this node takes its data # context of node other_context = content["context"] # key store of incoming node trying to gossip other_kvstore = content["kv-store"] incoming_Vc = VectorClock(view=None, clock=other_context) if bool(other_kvstore) and not incoming_Vc.allFieldsZero(): if current_key_store == self.keystore: print("I TOOK DATA: " + str(self.keystore), file=sys.stderr) self.VC.merge(other_context, self.ADDRESS) self.keystore = other_kvstore else: print("I RECIEVED AN UPDATE WHILE GOSSIPING, ABORT", file=sys.stderr) self.gossip = False #self happened before other, take its kvstore and merge with my clock # concurrent but other is tiebreaker else: # 400: Other is already gossiping with someone else # ELSE: unresponsive node (maybe itll be code 404?) self.gossiping = False else: # Curretly gossiping, # Will call after gossip backoff again self.gossiping = False return 200
class Server(Bottle): def __init__(self, ID, IP, servers_list): super(Server, self).__init__() self.id = int(ID) self.ip = str(IP) self.servers_list = servers_list self.serverIndex = self.servers_list.index(self.ip) self.vectorClock = VectorClock(self.serverIndex, len(self.servers_list)) self.myLogger = mylogger.Logger(self.ip) self.blackboard = distributedboard.Blackboard(self.vectorClock, self.myLogger) self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10) # list all REST URIs # if you add new URIs to the server, you need to add them here # API for Clients self.route('/', callback=self.index) self.get('/board', callback=self.get_board) self.post('/', callback=self.post_index) self.post('/board', callback=self.post_board) self.post('/board/<number>/', callback=self.post_board_ID) # API for Scripts self.get('/serverlist', callback=self.get_serverlist) self.get('/board/alldata',callback=self.get_board_data) # API for Server Internals self.post('/propagate', callback=self.post_propagate) self.post('/update_ctrl', callback=self.post_update_ctrl) # we give access to the templates elements self.get('/templates/<filename:path>', callback=self.get_template) # You can have variables in the URI, here's an example # self.post('/board/<element_id:int>/', callback=self.post_board) where post_board takes an argument (integer) called element_id # Lab3 Option Task!!!! self.get('/operation_log_size', callback=self.get_operation_log_size) thread = Thread(target=self.checkUpdatesOfOtherServers) thread.start() def operateOnUncommittedLog(self, logList): for x in logList: self.myLogger.addToQueue("Uncommited Operation" + str(x)) self.blackboard.propagateContent(x["Operation"], x["element"]) time.sleep(0.5) self.myLogger.addToQueue("Done Sending to Queue") def post_update_ctrl(self): data = list(request.body) parsedItem = json.loads(data[0]) self.myLogger.addToQueue("Inside post_update_ctrl " + str(parsedItem["msgType"])) if parsedItem["msgType"] == "Request For History": self.myLogger.addToQueue("VclockList = " + str(parsedItem["VclockList"])) complementedLog = self.blackboard.getAll_ComplementedLOg(parsedItem["VclockList"]) payload = { "msgType" : "Uncommited History", "historyList": complementedLog } self.executor.submit(self.contact_another_server, parsedItem["ip"], "/update_ctrl", "POST", dataToSend=json.dumps(payload)) elif parsedItem["msgType"] == "Uncommited History": self.myLogger.addToQueue("Try to add all: " + str(parsedItem["historyList"])) historyToCommit = parsedItem["historyList"] self.executor.submit(self.operateOnUncommittedLog, historyToCommit) else: #do nothing.. pass def get_operation_log_size(self): return json.dumps(self.blackboard.getOperationLogSize()) def checkUpdatesOfOtherServers(self): check_count = [] server_logCount = [] for i in range(0, len(self.servers_list)): check_count.append(0) server_logCount.append(0) time.sleep(10) while True: for i in range(0, len(self.servers_list)): x = self.servers_list[i] if x != self.ip: try: res = requests.get('http://{}{}'.format(x, '/operation_log_size'), timeout=5) ownHistorySize = self.blackboard.getOperationLogSize() otherHistorySize = json.loads(res.content) self.myLogger.addToQueue("Own History Size: " + str(ownHistorySize) + " <--> " + x + ": server's History Size " + str(otherHistorySize)) if check_count[i] == 0: check_count[i] = check_count[i] + 1 server_logCount[i] = otherHistorySize else: if server_logCount[i] == otherHistorySize: check_count[i] = check_count[i] + 1 else: check_count[i] = 0 server_logCount[i] = 0 if(check_count[i] == 3): if ownHistorySize < server_logCount[i]: # If Comes Here... After trying 3 times, got to know that... # Other servers surely have more history than me... # Need to get un-propagated history from that server self.myLogger.addToQueue("I am ready to get Data from " + x) payload = { "ip" : self.ip, "msgType": "Request For History", "VclockList": self.blackboard.getAll_Operation_Vclocks() } self.executor.submit(self.contact_another_server, x, "/update_ctrl", "POST", dataToSend=json.dumps(payload)) time.sleep(10) for i in range(0, len(self.servers_list)): check_count[i] = 0 server_logCount[i] = 0 break else: check_count[i] = 0 server_logCount[i] = 0 except requests.Timeout: self.myLogger.addToQueue("checkUpdatesOfOtherServers Timeout for " + str(x)) check_count[i] = 0 server_logCount[i] = 0 except requests.ConnectionError: self.myLogger.addToQueue("checkUpdatesOfOtherServers Connection Error for " + str(x)) check_count[i] = 0 server_logCount[i] = 0 time.sleep(10) def contact_another_server(self, srv_ip, URI, req='POST', dataToSend=None): # Try to contact another serverthrough a POST or GET # usage: server.contact_another_server("10.1.1.1", "/index", "POST", params_dict) success = False try: if 'POST' in req: #self.myLogger.addToQueue(self.ip + " => Sending to " + srv_ip+" , data = " + str(dataToSend)) res = requests.post('http://{}{}'.format(srv_ip, URI), data=dataToSend ) elif 'GET' in req: res = requests.get('http://{}{}'.format(srv_ip, URI)) if res.status_code == 200: success = True except Exception as e: print("[ERROR] "+str(e)) return success def propagate_to_all_servers(self, URI, req='POST', dataToSend=None): for srv_ip in self.servers_list: if srv_ip != self.ip: # don't propagate to yourself self.executor.submit(self.contact_another_server, srv_ip, URI, req, dataToSend) # route to ('/') def generateDataToShow(self): boardData = self.blackboard.get_content() #self.myLogger.addToQueue(str(boardData)) self.myLogger.addToQueue("time diff for operation ===========> " + str(self.blackboard.get_operation_time_diff())) self.myLogger.addToQueue(str(self.vectorClock.getCurrentClock())) customList = [] for x in boardData: customList.append((x["id"], x["entry"], x["vclock"])) return customList def index(self): return template('server/templates/index.tpl', board_title='Server {} ({}) Server Clock ({}) '.format(self.id, self.ip,self.vectorClock.getCurrentClock()), board_dict=self.generateDataToShow(), members_name_string='INPUT YOUR NAME HERE') # get on ('/board') def get_board(self): return template('server/templates/blackboard.tpl', board_title='Server {} ({}) Server Clock ({}) '.format(self.id, self.ip,self.vectorClock.getCurrentClock()), board_dict=self.generateDataToShow()) # get on ('/serverlist') def get_serverlist(self): return json.dumps(self.servers_list) # get all message def get_board_data(self): return json.dumps(self.blackboard.get_content()) # post on ('/board') add new entry def post_board(self): print("inside postboard") newEntry = request.forms.get('entry') self.myLogger.addToQueue('post_board: ' + newEntry) addedItem = self.blackboard.add_content(newEntry) payload = { "Operation": "add", "Element" : addedItem, } self.propagate_to_all_servers(URI="/propagate", req="POST", dataToSend=json.dumps(payload)) # post on ('/board/<number>) def post_board_ID(self, number): option = request.forms.get('delete') modified_entry = request.forms.get('entry') if option == "1": self.myLogger.addToQueue('DELETE : ' + str(number)) else: self.myLogger.addToQueue('MODIFY : ' + str(number) + " => " + modified_entry) if option == "1": deletedItem = self.blackboard.delete_content(number) self.propagate_to_all_servers(URI="/propagate", req="POST", dataToSend=json.dumps( { "Operation": "delete", "Element": deletedItem })) else: modifiedItem = self.blackboard.set_content(number,modified_entry) self.propagate_to_all_servers(URI="/propagate", req="POST", dataToSend=json.dumps( { "Operation": "modify", "Element": modifiedItem })) redirect('/') # post on ('/propagate') def post_propagate(self): data = list(request.body) parsedItem = json.loads(data[0]) self.blackboard.propagateContent(parsedItem["Operation"], parsedItem["Element"]) # post on ('/') def post_index(self): try: # we read the POST form, and check for an element called 'entry' new_entry = request.forms.get('entry') except Exception as e: print("[ERROR] "+str(e)) def get_template(self, filename): return static_file(filename, root='./server/templates/') def remove_server(self, ipToRemove): if ipToRemove in self.servers_list: self.servers_list.remove(ipToRemove)
class ReplicaManager(threading.Thread): ''' Class for a Replica Server within the distributed system, implementing the gossip architecture. ''' def __init__(self, replica_id, stopper, status=None): super().__init__() self._id = replica_id # Replica status properties self.failure_prob = 0.1 self.overload_prob = 0.2 self.auto_status = True if status not in [n.value for n in list(Status)]: print('Invalid status provided, defaulting to active.') self.status = Status.ACTIVE else: self.status = Status(status) self.auto_status = False print(f'Status set to {status}.', 'Automatic status updating disabled.') # Gossip Architecture State self.value_ts = VectorClock(REPLICA_NUM) # aka data timestamp self.replica_ts = VectorClock(REPLICA_NUM) # aka log timestamp self.update_log = [] self.ts_table = [ VectorClock(REPLICA_NUM) if i != self._id else None for i in range(REPLICA_NUM) ] self.executed = [] self.pending_queries = queue.Queue() self.query_results = {} self.interval = 8.0 # interval between gossip exchanges self.other_replicas = self._find_replicas() self.stopper = stopper # Used to indicate to server to stop # Locks for objects shared between threads self.vts_lock = threading.Lock() # for value_ts self.rts_lock = threading.Lock() # for replica_ts self.log_lock = threading.Lock() # for update_log def run(self): ''' Override of threading.Thread run() method. Sends gossip to other replica managers periodically. ''' while not self.stopper.is_set(): if self.status != Status.OFFLINE: for r_id, rm in self.other_replicas: rm._pyroRelease() self.other_replicas = self._find_replicas() with self.rts_lock: print('\n--- SENDING GOSSIP ---') for r_id, rm in self.other_replicas: r_ts = self.ts_table[r_id] m_log = self._get_recent_updates(r_ts) print(f'Updates to send to RM {r_id}: ', m_log) try: rm.send_gossip(m_log, self.replica_ts.value(), self._id) print(f'Gossip sent to RM {r_id}') except Pyro4.errors.CommunicationError as e: print(f'Failed to send gossip to RM {r_id}') print('----------------------') if self.auto_status: self._update_status() print('Status: ', self.status.value, '\n') self.stopper.wait(self.interval) print('Stopper set, gossip thread stopping.') def send_query(self, q_op, q_prev): ''' Method invoked by the front end to send a query. Params: (string) q_op: query command (tuple) q_prev: vector timestamp of front end Returns: response: results of query ''' print('Query received: ', q_op, q_prev) response = None q_prev = VectorClock.fromiterable(q_prev) # stable = are we up to date enough to handle the query correctly? stable = False with self.vts_lock: if q_prev <= self.value_ts: # stability criteria for query val = self._apply_query(q_op) new = self.value_ts.value() response = (val, new) stable = True print('Value timestamp: ', self.value_ts.value(), '\n') if not stable: # if not stable, add to a dictionary of pending queries and wait self.query_results[(q_op, q_prev.value())] = queue.Queue(maxsize=1) self.pending_queries.put((q_op, q_prev)) # Wait for query to be executed after some gossip exchange response = self.query_results[(q_op, q_prev.value())].get() # Remove entry from pending query dictionary del self.query_results[(q_op, q_prev.value())] return response def send_update(self, u_op, u_prev, u_id): ''' Method invoked by the front end to send an update. Params: (string) u_op: update command (tuple) u_prev: vector timestamp of front end (string) u_id: unique ID for update Returns: ts: timestamp representing having executed the update or None if the update has already been executed ''' print('Update received: ', u_op, u_prev, u_id) ts = None # Add update to log if it hasn't already been executed if u_id not in self.executed: with self.rts_lock: self.replica_ts.increment(self._id) ts = list(u_prev[:]) ts[self._id] = self.replica_ts.value()[self._id] print('Replica timestamp: ', self.replica_ts, '\n') ts = VectorClock.fromiterable(ts) u_prev = VectorClock.fromiterable(u_prev) log_record = (self._id, ts, u_op, u_prev, u_id) with self.log_lock: self.update_log.append(log_record) print('Update record: ', log_record) # Execute update if it is stable with self.vts_lock: if u_prev <= self.value_ts: # stability criteria for query self._execute_update(u_op, u_id, ts) return ts.value() return ts @Pyro4.oneway def send_gossip(self, m_log, m_ts, r_id): ''' Method invoked by other replica managers to send gossip. Params: (string) m_log: recent updates from replica manager (tuple) m_ts: log timestamp of sending replica manager (string) r_id: ID of sending replica manager Returns: ts: timestamp representing having executed the update or None if the update has already been executed ''' if self.status != Status.OFFLINE: print('\n--- RECEIVING GOSSIP ---') print(f'Gossip received from RM {r_id}') print(m_ts) print(m_log) print() # Merge m_log into update log self._merge_update_log(m_log) # Merge our replica timestamp with m_ts m_ts = VectorClock.fromiterable(m_ts) with self.rts_lock: self.replica_ts.merge(m_ts) print('Replica timestamp: ', self.replica_ts) # Execute all updates that have now become stable stable = self._get_stable_updates() for update in stable: _id, ts, u_op, u_prev, u_id = update with self.vts_lock: self._execute_update(u_op, u_id, ts) # Set the timestamp of the sending replica manager in our timestamp # table self.ts_table[r_id] = m_ts # Execute all stable pending queries while True: try: q_op, q_prev = self.pending_queries.get(block=False) with self.vts_lock: if q_prev <= self.value_ts: val = self._apply_query(q_op) new = self.value_ts.value() self.query_results[(q_op, q_prev.value())].put( (val, new)) except queue.Empty: break print('------------------------') def get_status(self): ''' Method invoked by front end to query the server status. Returns: status of the server ''' return self.status.value def set_status(self, status): ''' Method invoked by status_control.py to set the server status. ''' self.status = Status(status) def toggle_auto_status(self, auto): ''' Method invoked by status_control.py to set the server status to update automatically or not. ''' if auto: self.auto_status = True else: self.auto_status = False def _update_status(self): ''' Set the server status probabilistically. ''' overloaded = random.random() failed = random.random() if failed < self.failure_prob: self.status = Status.OFFLINE elif overloaded < self.overload_prob: self.status = Status.OVERLOADED else: self.status = Status.ACTIVE def _apply_query(self, q_op): ''' Execute a query command. Params: (string) q_op: query command to execute Returns: val: result of query ''' print('Query applied. ', q_op, '\n') val = None op, *params = q_op query = self._parse_q_op(op) val = query(*params) return val def _apply_update(self, u_op): ''' Execute an update command. Params: (string) u_op: update command to execute ''' print('Update applied.', u_op, '\n') op, *params = u_op update = self._parse_u_op(op) update(*params) def _execute_update(self, u_op, u_id, ts): ''' Execute an update. Params: (string) u_op: update command to execute (string) u_id: ID of update to execute (VectorClock) ts: timestamp of update to execute ''' # Return immediately if update has already been executed if u_id in self.executed: return self._apply_update(u_op) # Execute the update self.value_ts.merge(ts) # Update the value timestamp self.executed.append(u_id) # Add update to executed updates print('Value timestamp: ', self.value_ts) def _merge_update_log(self, m_log): ''' Merge the update log with updates from a gossip message. Params: m_log: list of updates from a gossip message ''' for record in m_log: _id, ts, u_op, u_prev, u_id = record ts = VectorClock.fromiterable(ts) u_prev = VectorClock.fromiterable(u_prev) with self.rts_lock, self.log_lock: new_record = (_id, ts, u_op, u_prev, u_id) if new_record not in self.update_log: if not ts <= self.replica_ts: self.update_log.append(new_record) def _get_stable_updates(self): ''' Retrieve all stable updates from the update log. Returns: stable: list of updates that can be executed. ''' stable = [] with self.vts_lock, self.log_lock: stable = [ record for record in self.update_log if record[3] <= self.value_ts ] stable.sort(key=lambda r: r[3]) return stable def _get_recent_updates(self, r_ts): ''' Retrieve updates from update log that are more recent than our recorded value of the timestamp of another replica manager. Params: (VectorClock) r_ts: Timestamp of another replica manager, sent in gossip Returns: recent: all updates from update log that are more recent than the given timestamp ''' recent = [] with self.log_lock: for record in self.update_log: _id, ts, u_op, u_prev, u_id = record if ts > r_ts: new_record = (_id, ts.value(), u_op, u_prev.value(), u_id) recent.append(new_record) return recent def _find_replicas(self): ''' Find all online replica managers. Returns: servers: list of remote server objects for replica managers ''' servers = [] try: with Pyro4.locateNS() as ns: for server, uri in ns.list(prefix="network.replica.").items(): server_id = int(server.split('.')[-1]) if server_id != self._id: servers.append((server_id, Pyro4.Proxy(uri))) except Pyro4.errors.NamingError: print('Could not find Pyro nameserver.') servers.sort() return servers[:REPLICA_NUM] @staticmethod def _parse_q_op(op): ''' Match query command strings with query functions. Params: (string) op: query command Returns: function corresponding to the query command ''' return { ROp.GET_AVG_RATING.value: get_avg_movie_rating, ROp.GET_RATINGS.value: get_movie_ratings, ROp.GET_GENRES.value: get_movie_genres, ROp.GET_MOVIE.value: get_movie_by_title, ROp.GET_TAGS.value: get_movie_tags, ROp.SEARCH_TITLE.value: search_by_title, ROp.SEARCH_GENRE.value: search_by_genre, ROp.SEARCH_TAG.value: search_by_tag }[op] @staticmethod def _parse_u_op(op): ''' Match update command strings with update functions. Params: (string) op: update command Returns: function corresponding to the update command ''' return { ROp.ADD_RATING.value: submit_rating, ROp.ADD_TAG.value: submit_tag }[op]
def merge_dict(vc_list): json_dict = "{%s}" % ", ".join(["%s" % vc.asItemString() for vc in vc_list ]) return VectorClock.fromDict(eval(json_dict))
from message_struct import MessageStruct from client_socket import SocketDynamoClient import logging, os from flask import request, jsonify, Response from flask import render_template from consistentHashing import preference_list from creating_bucket import create_bucket from client_socket import bucket_creation_1, create_down_system, share_v_clock from vectorclock import VectorClock import json, random peers_file_path = '/home/HDUSER/clouda2/peers.txt' gossip_file_path = '/home/HDUSER/clouda2/gossip.txt' vec = VectorClock() def createApp(): resp_data = {} app = Flask(__name__) file_handler = logging.FileHandler('server.log') app.logger.addHandler(file_handler) app.logger.setLevel(logging.INFO) PROJECT_HOME = os.path.dirname(os.path.realpath(__file__)) UPLOAD_FOLDER = '{}/uploads/'.format(PROJECT_HOME) UPLOAD_BUCKET_FOLDER = '/home/HDUSER/clouda2/uploads/' app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER if not os.path.exists(peers_file_path):
class Node(KV_store): '''docstring for node class''' def __init__(self, router, address, view, replication_factor): self.gossiping = False self.sched = Scheduler() self.sched.start() KV_store.__init__(self, address) self.history = [('Initialized', datetime.now())] self.ADDRESS = address self.VC = VectorClock(view=view, clock=None) self.ring_edge = 691 if len( view) < 100 else 4127 # parameter for hash mod value self.repl_factor = replication_factor self.num_shards = 0 self.virtual_range = 10 self.shard_interval = self.ring_edge // self.virtual_range self.nodes = [] self.shard_ID = -1 self.V_SHARDS = [] # store all virtual shards self.P_SHARDS = [[] for i in range(0, self.num_shards) ] # map physical shards to nodes self.virtual_translation = {} # map virtual shards to physical shards self.backoff_mod = 113 self.router = router self.view_change(view, replication_factor) def __repr__(self): return { 'ADDRESS': self.ADDRESS, 'V_SHARDS': self.V_SHARDS, 'P_SHARDS': self.P_SHARDS, 'KEYS': len(self.keystore) } def __str__(self): return 'ADDRESS: ' + self.ADDRESS + '\nREPL_F: ' + str( self.repl_factor) + '\nNODES: ' + (', '.join(map( str, self.nodes))) + '\nP_SHARDS: ' + (', '.join( map(str, self.P_SHARDS))) ''' give a state report this includes node data and distribution of keys to nodes ''' def state_report(self): state = self.__repr__() state['HISTORY'] = {} string = 'node' itr = 1 for event in self.history: key = string + str(itr) itr += 1 state['HISTORY'][key] = event return state ''' return all physical shards ''' def all_shards(self): return self.P_SHARDS def all_nodes(self): return self.nodes ''' get all nodes in this shard ''' def shard_replicas(self, shard_ID): return self.P_SHARDS[shard_ID] ''' hash frunction is a composit of xxhash modded by prime ''' def hash(self, key, Type): hash_val = hasher.xxh32(key).intdigest() # may be expensive but will produce better distribution return (hash_val % self.ring_edge) ''' evenly distribute nodes into num_shard buckets ''' def even_distribution(self, repl_factor, nodes): nodes.sort() num_shards = (len(nodes) // repl_factor) replicas = (len(nodes) // num_shards) overflow = (len(nodes) % num_shards) shards = [[] for i in range(0, num_shards)] shard_dict = {} node_iter = 0 for shard in range(num_shards): extra = (1 if shard < overflow else 0) interval = replicas + extra shards[shard] = nodes[node_iter:(node_iter + interval)] node_iter += interval for node in shards[shard]: shard_dict[node] = shard return shard_dict ''' Perform a key operation, ie. find the correct shard given key. First hash the key then perform binary search to find the correct shard to store the key. ''' def find_match(self, key): ring_val = self.hash(key, 'consistent') # get the virtual shard number v_shard = self.find_shard('predecessor', ring_val) # convert to physical shard shard_ID = self.virtual_translation[v_shard] return shard_ID ''' perform binary search on list of virtual shards given ring value we need to be careful about wrap around case. If ring_val >= max_ring_val, return 0 ''' def find_shard(self, direction, ring_val): if direction == 'predecessor': v_shard = bisect_left(self.V_SHARDS, ring_val) if v_shard: return self.V_SHARDS[v_shard - 1] return self.V_SHARDS[-1] elif direction == 'successor': v_shard = bisect_right(self.V_SHARDS, ring_val) if v_shard != len(self.V_SHARDS): return self.V_SHARDS[v_shard] return self.V_SHARDS[0] ''' respond to view change request, perform a reshard this can only be done if all nodes have been given new view 2 cases: 1. len(nodes) + 1 // r > or < shard_num: we need to add or remove a shard to maintain repl_factor 2. add and/or remove nodes ''' def view_change(self, view, repl_factor): new_num_shards = len(view) // repl_factor if new_num_shards == 1: new_num_shards = 2 view.sort() buckets = self.even_distribution(repl_factor, view) #print('buckets', buckets) # add nodes and shards for node in view: my_shard = buckets[node] if node == self.ADDRESS: self.shard_ID = buckets[node] self.sched.add_interval_job(self.gossip, seconds=self.gossip_backoff()) # add a new node if node not in self.nodes: self.add_node(node, my_shard, new_num_shards) # move node to new shard else: if my_shard >= len(self.P_SHARDS): self.add_shard() if node not in self.P_SHARDS[my_shard]: self.move_node(node, my_shard) old_nodes = list(set(self.nodes) - set(view)) # remove nodes from view for node in old_nodes: self.remove_node(node) # remove empty shards for shard_ID in range(0, len(self.P_SHARDS)): if len(self.P_SHARDS[shard_ID]) == 0: self.remove_shard(shard_ID) ''' Add a single node to shards and get keys from shard replicas ''' def add_node(self, node, shard_ID, num_shards): # do we need to add another shard before adding nodes while num_shards > self.num_shards: self.add_shard() # update internal data structures self.nodes.append(node) self.nodes.sort() self.P_SHARDS[shard_ID].append(node) # determine if the node's shard is this shard if self.shard_ID == shard_ID: #print('<adding node to:', shard_ID) self.shard_keys() ''' move node from old shard to new shard and perform atomic key transfer ''' def move_node(self, node, shard_ID): old_shard_ID = self.nodes.index(node) // self.num_shards if node not in self.P_SHARDS[old_shard_ID]: if old_shard_ID > 0 and node in self.P_SHARDS[old_shard_ID - 1]: old_shard_ID += -1 else: old_shard_ID += 1 # do we need to add another shard before adding nodes while shard_ID > len(self.P_SHARDS): self.add_shard() self.atomic_key_transfer(old_shard_ID, shard_ID, node) self.P_SHARDS[shard_ID].append(node) self.P_SHARDS[old_shard_ID].pop( self.P_SHARDS[old_shard_ID].index(node)) ''' remove single node from a shard and send final state to shard replicas ''' def remove_node(self, node): shard_ID = (self.nodes.index(node) - 1) // self.num_shards if shard_ID > 0 and shard_ID < len( self.P_SHARDS) and node not in self.P_SHARDS[shard_ID]: if shard_ID > 0 and node in self.P_SHARDS[shard_ID - 1]: shard_ID += -1 else: shard_ID += 1 #print('error finding node') if node == self.ADDRESS: print('<send my final state to my replicas before removing') success = self.final_state_transfer(node) if success: self.nodes.pop(self.nodes.index(node)) else: raise Exception('<final_state_transfer failed>') else: self.nodes.pop(self.nodes.index(node)) self.P_SHARDS[shard_ID].pop(self.P_SHARDS[shard_ID].index(node)) ''' add shard to view ''' def add_shard(self): new_shards = [] p_shard = self.num_shards if p_shard >= len(self.P_SHARDS): self.P_SHARDS.append([]) for v_shard in range(self.virtual_range): virtural_shard = str(p_shard) + str(v_shard) ring_num = self.hash(virtural_shard, 'consistent') # unique value on 'ring' # if ring_num is already in unsorted list, skip this iteration if ring_num in self.V_SHARDS: #print('<System: Hash collision detected>') continue self.V_SHARDS.append(ring_num) self.virtual_translation[ring_num] = p_shard self.num_shards += 1 self.V_SHARDS.sort() return new_shards ''' remove from all internal data structures if there are no nodes in shard ''' def remove_shard(self, shard_ID): self.P_SHARDS.pop(shard_ID) ''' get all keys for a given shard ''' def shard_keys(self): pass ''' perform an atomic key transfer concurrent operation: get new keys, send old keys, delete old keys ''' def atomic_key_transfer(self, old_shard_ID, new_shard_ID, node): # message all nodes and tell them your state # get new keys from new replica self.final_state_transfer() old_kv = self.KV_store for replica in self.P_SHARDS[old_shard_ID]: data = None try: res, status_code = self.router.GET(replica, '/kv-store/internal/KV', data, False) except: continue if status_code == 201: new_kv = res.get('KV_store') update = False for key in new_kv: self.KV_store.keystore[key] = new_kv[key] for key in old_kv: del self.KV_store.keystore[key] return True return False ''' send final state of node before removing a node ''' def final_state_transfer(self, node): data = {"kv-store": self.keystore, "context": self.VC.__repr__()} replica_ip_addresses = self.shard_replicas(self.shard_ID) for replica in replica_ip_addresses: if (replica != self.ADDRESS): try: res, status_code = self.router.PUT( replica, '/kv-store/internal/state-transfer', data, False) except: continue if status_code == 201: return True return False ''' handle node failures, check if node should be removed or not ''' def handle_unresponsive_node(self, node): pass def gossip_backoff(self): return hash(self.ADDRESS) % random.randint(20, 40) def gossip(self): if (self.gossiping == False): current_key_store = self.keystore self.gossiping = True replica_ip_addresses = self.shard_replicas(self.shard_ID) replica = replica_ip_addresses[(random.randint( 0, len(replica_ip_addresses) - 1))] while (self.ADDRESS == replica): replica = replica_ip_addresses[(random.randint( 0, len(replica_ip_addresses) - 1))] myNumber = int((self.ADDRESS.split(".")[3]).split(":")[0]) otherNumber = int((replica.split(".")[3]).split(":")[0]) tiebreaker = replica if (otherNumber > myNumber) else self.ADDRESS data = { "context": self.VC.__repr__(), "kv-store": current_key_store, "tiebreaker": tiebreaker } print("sending to node: " + replica + " " + str(data), file=sys.stderr) try: response = self.router.PUT(replica, '/kv-store/internal/gossip/', json.dumps(data)) except: code = -1 code = response.status_code if (code == 200): # 200: They took my data self.gossiping = False elif (code == 501): content = response.json() # 501: # the other node was either the tiebreaker or happened after self # so this node takes its data # context of node other_context = content["context"] # key store of incoming node trying to gossip other_kvstore = content["kv-store"] incoming_Vc = VectorClock(view=None, clock=other_context) if bool(other_kvstore) and not incoming_Vc.allFieldsZero(): if current_key_store == self.keystore: print("I TOOK DATA: " + str(self.keystore), file=sys.stderr) self.VC.merge(other_context, self.ADDRESS) self.keystore = other_kvstore else: print("I RECIEVED AN UPDATE WHILE GOSSIPING, ABORT", file=sys.stderr) self.gossip = False #self happened before other, take its kvstore and merge with my clock # concurrent but other is tiebreaker else: # 400: Other is already gossiping with someone else # ELSE: unresponsive node (maybe itll be code 404?) self.gossiping = False else: # Curretly gossiping, # Will call after gossip backoff again self.gossiping = False return 200
def put_rating(entity): # Check to make sure JSON is ok type = mimeparse.best_match(['application/json'], request.headers.get('Accept')) if not type: return abort(406) # Check to make sure the data we're getting is JSON if request.headers.get('Content-Type') != 'application/json': return abort(415) response.headers.append('Content-Type', type) # Read the data sent from the client data = json.load(request.body) setrating = data.get('rating') setclock = VectorClock.fromDict(data.get('clocks')) # Basic sanity checks on the rating if isinstance(setrating, int): setrating = float(setrating) if not isinstance(setrating, float): return abort(400) # Weave the new rating into the current rating list key = '/rating/'+entity # YOUR CODE GOES HERE # REPLACE THE FOLLOWING LINE # HINT: CONSIDER USING THE HASH DATA TYPE IN REDIS (HGET/HSET/...) old_rating = client.hget(key, 'rating') # if rating does not exist, add it. Otherwise.. # SET THE RATING, CHOICES, AND CLOCKS IN THE DATABASE FOR THIS KEY # COMPUTE THE MEAN, finalrating if not old_rating: client.hset(key, 'rating', setrating) client.hset(key, 'choices', [setrating]) client.hset(key, 'clocks', jsonify_vcl([setclock])) finalrating = setrating else: finalrating = old_rating choices = eval(client.hget(key, 'choices')) vcl = eval(client.hget(key, 'clocks')) new_vcl = [] new_choices = [] greaterThanAlreadyFound = False needToUpdateDB = True for i in range(0, len(vcl)): old_clock = VectorClock.fromDict(vcl[i]) # if the received clock is older, nothing needs updating if setclock <= old_clock: needToUpdateDB = False break else: # if the received clock is newer, make changes accordingly if setclock > old_clock: # If we have not found an older clock and replaced it with the # new one previously, put this new clock in. Otherwise, ignore. if not greaterThanAlreadyFound: greaterThanAlreadyFound = True new_vcl.append(setclock) new_choices.append(setrating) # incomparable else: new_vcl.append(old_clock) new_choices.append(choices[i]) # Update DB only if the received clock is not older than or the same as any of the # existing clocks if needToUpdateDB: # if the received clock is not newer than any of the existing clocks, it's # incomparable if not greaterThanAlreadyFound: new_vcl.append(setclock) new_choices.append(setrating) # calculate the new rating ratingSum = 0.0 for choice in new_choices: ratingSum+=choice finalrating = ratingSum/len(new_choices) # update DB client.hset(key, 'rating', finalrating) client.hset(key, 'choices', new_choices) client.hset(key, 'clocks', jsonify_vcl(new_vcl)) # Return the new rating for the entity return { "rating": finalrating }