Example #1
0
 def _rank_aggregate(self,query):
     """(QueryObject) -> list of json object
     
     Calculate the total rank score for each object associated with the query.
     """
     ##############
     ## matching ##
     ##############
     collection = self.mongo_connection["index"]["termdoc"]
     
     with Profiler("index_DB_access_for_approx_match"):
         results_cursor = list(collection.find(query.match_statement))
    
     results = self._groupbyDoc(query,results_cursor)
     return results
Example #2
0
    def execute(self,query):
        """ (QueryObject) -> list of json objects
        
        execute the local aggregator function. (similar to A.local() in echo context)
        """
        #########################
        # matching + projection #
        #########################
        
        # assign local variables
        object_collection = self.mongo_connection["sensor"]["objects"]
        limit = query.parameters['limit']

        # also filter out 'content' and '_id' fields
        if not query.projection_statement or len(query.projection_statement)==0:
            query.projection_statement = {"content":0} #,"_id":0}
        #TODO: should have else: filter "_id":0
        
        if query.parameters['isApprox']:
            # Approx. match
            
            # get index objects
            results = self._rank_aggregate(query)
            sorted_results = sorted(results, key=operator.itemgetter('total_score'),reverse=True)
            sorted_results = sorted_results[:limit]
            
            # get list of real-object oids from the index
            oid_list = map(operator.itemgetter('document'),sorted_results)
            
            with Profiler("object_DB_access_for_approx_match"):
                # get actual objects
                if oid_list:
                    real_object_results = list(object_collection.find({"_id":{'$in':oid_list}},query.projection_statement))
                else:
                    return []
            
            # add 'total_score' attribute to the real object
            temp = {}
            for obj in real_object_results:
                temp[obj['_id']] = obj
            for obj_index in sorted_results:
                try:
                    temp[obj_index['document']]['total_score'] = obj_index['total_score']
                except KeyError:
                    logging.warning("mismatch index object and real object : " + str(obj_index['document']))
                    
            results = temp.itervalues()
            sorted_results = sorted(results, key=operator.itemgetter('total_score'),reverse=True)
                              
        else:
            # exact match
            with Profiler("object_DB_access_for_exact_match"):
                results_cursor = object_collection.find(query.match_statement,query.projection_statement,limit=limit if limit else 0)            
                sorted_results = list(results_cursor)

        ########################
        # Aggregation function #
        ########################
        ## unique
        if query.aggregation_function_list and len(query.aggregation_function_list)==1 and 'unique' in query.aggregation_function_list[0]:
            temp = {}
            func_name,attr_name = query.aggregation_function_list[0]
            
            # generator object of values of an attribute that exists
            generator_values = (obj[attr_name] for obj in sorted_results if attr_name in obj) 
            for attr_value in generator_values:
                temp[attr_value] = 1
            sorted_results = [{attr_name:list(temp.iterkeys())}]
            return sorted_results
        
        temp_results = []
        
        # perform group by 
        if query.group_attribute_list:
            
            #filter the object that doesn't have the group attribute out
            sorted_results = filter( lambda x : all (k in x for k in query.group_attribute_list) , sorted_results)
 
            # sort base on attributes in a group_attribute_list
            sorted_results = sorted(sorted_results,key=operator.itemgetter(*query.group_attribute_list))

            # perform group by
            for k,g in itertools.groupby(sorted_results,key = operator.itemgetter(*query.group_attribute_list)):

                # perform aggregate on g
                obj = self._aggr_perform(query.aggregation_function_list,g)
                
                # add to result object  
                for i in range(len(query.group_attribute_list)):
                    obj[query.group_attribute_list[i]] = k[i] if isinstance(k,tuple) else k
                    
                temp_results.append(obj)
            sorted_results = temp_results
        ## normal aggregation case
        elif query.aggregation_function_list:
            sorted_results = [self._aggr_perform(query.aggregation_function_list,sorted_results)]
        ## don't perform an aggregation
        else:
            pass
        
        return sorted_results