def dispatch(self, echoMessage): """ dispatch a message to a echoProcess worker based on load-balancing scheme.""" if Profiler.enabled: Profiler.send_data(("transmission_of_msg_type_%i" % echoMessage.get_type()), time.time() - echoMessage.t0) # re-measure t0 for queuing time echoMessage.t0 = time.time() # TODO: add proper load balancing. For now, use id%Nprocess invoke_id = echoMessage.get_id() # send to a queue self.process_queue_list[invoke_id % self.Nprocess].put(echoMessage)
def dispatch(self, echoMessage): """ dispatch a message to a echoProcess worker based on load-balancing scheme.""" if Profiler.enabled: Profiler.send_data( ('transmission_of_msg_type_%i' % echoMessage.get_type()), time.time() - echoMessage.t0) # re-measure t0 for queuing time echoMessage.t0 = time.time() #TODO: add proper load balancing. For now, use id%Nprocess invoke_id = echoMessage.get_id() # send to a queue self.process_queue_list[invoke_id % self.Nprocess].put(echoMessage)
def run(self): """ This method will be called when the process started. Its responsibility is to get a message from the queue and ask EchoProtocol class to perform the task""" # initiate EchoProtocol echo_protocol = EchoProtocol() input_queue = self.input_queue while 1: # get message out echo_message = input_queue.get() # collect queue time if Profiler.enabled: Profiler.send_data('Queue_time',time.time()-echo_message.t0) # perform a task (blocking call) echo_protocol.execute(echo_message)
def run(self): """ This method will be called when the process started. Its responsibility is to get a message from the queue and ask EchoProtocol class to perform the task""" # initiate EchoProtocol echo_protocol = EchoProtocol() input_queue = self.input_queue while 1: # get message out echo_message = input_queue.get() # collect queue time if Profiler.enabled: Profiler.send_data('Queue_time', time.time() - echo_message.t0) # perform a task (blocking call) echo_protocol.execute(echo_message)
def _rank_aggregate(self,query): """(QueryObject) -> list of json object Calculate the total rank score for each object associated with the query. """ ############## ## matching ## ############## collection = self.mongo_connection["index"]["termdoc"] with Profiler("index_DB_access_for_approx_match"): results_cursor = list(collection.find(query.match_statement)) results = self._groupbyDoc(query,results_cursor) return results
def dispatch(self, echoMessage): """ dispatch a message to a echoProcess worker based on load-balancing scheme.""" if Profiler.enabled: Profiler.send_data(('transmission_of_msg_type_%i' % echoMessage.get_type()),time.time()-echoMessage.t0) # re-measure t0 for queuing time echoMessage.t0 = time.time() #TODO: add proper load balancing. For now, use id%Nprocess invoke_id = echoMessage.get_id() # send to a queue self.process_queue_list[invoke_id%self.Nprocess].put(echoMessage) #print "{}".format(self.process_queue_list[0].qsize()) #Correct load balance # #TODO: need a capped dict, reduce CPU usage or come up with the new idea to LB # try: # # send to the queue in case of its invoke id has been visited before # self.process_queue_list[self.multiplex_table[invoke_id]].put(echoMessage) # except KeyError: # # send to a queue by apply load balancing # #TODO: what if all queue empty all the time # min_queue = min(self.process_queue_list, key=lambda x: x.qsize()) # min_queue.put(echoMessage) # self.multiplex_table[invoke_id] = self.process_queue_list.index(min_queue) # # #print "{} {} {}".format(self.process_queue_list[0].qsize(),self.process_queue_list[1].qsize(),self.process_queue_list[2].qsize()) #TODO: not in used # class OutgoingDispatcher(threading.Thread): # """ It receives outgoing messages from workers (echoProcesses) and transmits to destinations.""" # # def __init__(self,neighbor_list): # # base class initialization # super(OutgoingDispatcher,self).__init__() # # self.context = zmq.Context() # self.receiver = self.context.socket(zmq.PULL) # self.receiver.bind("ipc:///tmp/outgoing") # ipc : local inter-process communication # # #setup queues and outgoing_socket_thread # self.setup_outgoing_queue_and_socket(neighbor_list) # # logging.info("Outgoing dispatcher initiated Successfully.") # # def setup_outgoing_queue_and_socket(self,neighbor_list): # """ initialize outgoing queues and sockets""" # # #setup queues and outgoing_socket_thread # self.outgoing_queues = {} # for neighbor in neighbor_list: # self.outgoing_queues[neighbor] = Queue.Queue() # threading.Thread(target=self.outgoing_socket_thread, # args=(self.outgoing_queues[neighbor],neighbor,DISPATCHER_PORT)).start() # # #add a special outgoing_socket_thread for sending a return message # self.outgoing_queues['localhost'] = Queue.Queue() # threading.Thread(target=self.outgoing_socket_thread, # args=(self.outgoing_queues['localhost'],'localhost',INTERFACE_PORT)).start() # # def outgoing_socket_thread(self,queue,neighbor_address,port): # """ a thread for getting a message from the queue and send it to destination """ # # sender = self.context.socket(zmq.PUSH) # sender.connect("tcp://{0}:{1}".format(neighbor_address,port)) # # while 1: # sender.send(queue.get()) # # # #override method run() in a Thread # def run(self): # #assign function addresses locally # dispatch = self.dispatch # receiver = self.receiver # # while 1: # #receive a message tuple from echoProcess # message_tuple = receiver.recv_multipart() # dispatch(message_tuple) # # # def dispatch(self, message_tuple): # """ dispatch a message from a echoProcess worker and place in outgoing queue.""" # message_content = message_tuple[0] # destination_list = message_tuple[1:] # # for destination_name in destination_list: # try: # self.outgoing_queues[destination_name].put(message_content) # except KeyError: # logging.error("Can't find destination to {0} for sending a message".format(destination_name)) #
def execute(self,query): """ (QueryObject) -> list of json objects execute the local aggregator function. (similar to A.local() in echo context) """ ######################### # matching + projection # ######################### # assign local variables object_collection = self.mongo_connection["sensor"]["objects"] limit = query.parameters['limit'] # also filter out 'content' and '_id' fields if not query.projection_statement or len(query.projection_statement)==0: query.projection_statement = {"content":0} #,"_id":0} #TODO: should have else: filter "_id":0 if query.parameters['isApprox']: # Approx. match # get index objects results = self._rank_aggregate(query) sorted_results = sorted(results, key=operator.itemgetter('total_score'),reverse=True) sorted_results = sorted_results[:limit] # get list of real-object oids from the index oid_list = map(operator.itemgetter('document'),sorted_results) with Profiler("object_DB_access_for_approx_match"): # get actual objects if oid_list: real_object_results = list(object_collection.find({"_id":{'$in':oid_list}},query.projection_statement)) else: return [] # add 'total_score' attribute to the real object temp = {} for obj in real_object_results: temp[obj['_id']] = obj for obj_index in sorted_results: try: temp[obj_index['document']]['total_score'] = obj_index['total_score'] except KeyError: logging.warning("mismatch index object and real object : " + str(obj_index['document'])) results = temp.itervalues() sorted_results = sorted(results, key=operator.itemgetter('total_score'),reverse=True) else: # exact match with Profiler("object_DB_access_for_exact_match"): results_cursor = object_collection.find(query.match_statement,query.projection_statement,limit=limit if limit else 0) sorted_results = list(results_cursor) ######################## # Aggregation function # ######################## ## unique if query.aggregation_function_list and len(query.aggregation_function_list)==1 and 'unique' in query.aggregation_function_list[0]: temp = {} func_name,attr_name = query.aggregation_function_list[0] # generator object of values of an attribute that exists generator_values = (obj[attr_name] for obj in sorted_results if attr_name in obj) for attr_value in generator_values: temp[attr_value] = 1 sorted_results = [{attr_name:list(temp.iterkeys())}] return sorted_results temp_results = [] # perform group by if query.group_attribute_list: #filter the object that doesn't have the group attribute out sorted_results = filter( lambda x : all (k in x for k in query.group_attribute_list) , sorted_results) # sort base on attributes in a group_attribute_list sorted_results = sorted(sorted_results,key=operator.itemgetter(*query.group_attribute_list)) # perform group by for k,g in itertools.groupby(sorted_results,key = operator.itemgetter(*query.group_attribute_list)): # perform aggregate on g obj = self._aggr_perform(query.aggregation_function_list,g) # add to result object for i in range(len(query.group_attribute_list)): obj[query.group_attribute_list[i]] = k[i] if isinstance(k,tuple) else k temp_results.append(obj) sorted_results = temp_results ## normal aggregation case elif query.aggregation_function_list: sorted_results = [self._aggr_perform(query.aggregation_function_list,sorted_results)] ## don't perform an aggregation else: pass return sorted_results