Esempio n. 1
0
    def interpret(self, raw_query):
        """ (str,) -> Query_object
        Interpret the raw query to a query object so that
        the system can handle it.
        """

        # Parse the query
        match_query, project_query, aggr_query, \
        group_query, pass_one_query, link_attributes, \
        pass_two_query, flags = nsInterface(raw_query, self.args.userExact)

        if not match_query:
            logging.warning("unable to parse %s" % raw_query)

        if 'projection' in flags:
            project_query.update({'object-name': 1, 'object-type': 1})
            return QueryObject(match_query,
                               project_query,
                               parameters={
                                   'isRank': False,
                                   "isApprox": False,
                                   "limit": self.args.limit
                               })
        elif 'aggregation' in flags:
            return QueryObject(match_query,
                               project_query,
                               aggr_query,
                               group_query,
                               parameters={
                                   'isRank': False,
                                   "isApprox": False,
                                   "limit": self.args.limit
                               })
        elif 'approximate' in flags:
            return QueryObject(match_query,
                               parameters={
                                   'isRank': True,
                                   "isApprox": True,
                                   "limit": self.args.limit
                               })
        elif 'exact' in flags:
            return QueryObject(match_query,
                               parameters={
                                   'isRank': False,
                                   "isApprox": False,
                                   "limit": self.args.limit
                               })
Esempio n. 2
0
    def interpret(self, raw_query):
        """ (str,) -> Query_object
        Interpret the raw query to a query object so that
        the system can handle it.
        """

        # Parse the query
        match_query, project_query, aggr_query, \
        group_query, pass_one_query, link_attributes, \
        pass_two_query, flags = nsInterface(raw_query, self.args.userExact)

        if not match_query:
            logging.warning("unable to parse %s" % raw_query)

        if 'projection' in flags:
            project_query.update({'object-name': 1, 'object-type': 1})
            return QueryObject(
                     match_query, project_query,
                     parameters={'isRank': False, "isApprox": False,
                                 "limit": self.args.limit}
                     )
        elif 'aggregation' in flags:
            return QueryObject(
                     match_query, project_query,
                     aggr_query, group_query,
                     parameters={'isRank': False, "isApprox": False,
                                 "limit": self.args.limit}
                     )
        elif 'approximate' in flags:
            return QueryObject(
                     match_query,
                     parameters={'isRank': True, "isApprox": True,
                                 "limit": self.args.limit}
                     )
        elif 'exact' in flags:
            return QueryObject(
                     match_query,
                     parameters={'isRank': False, "isApprox": False,
                                 "limit": self.args.limit}
                     )
Esempio n. 3
0
def main():

    keywords = set()

    ####################
    # getting keywords #
    ####################
    for server_name in server_names:
        connection = pymongo.MongoClient(server_name, 27017)
        collection = connection["sensor"]["objects"]
        objects_cursor = collection.find({}, {
            '_id': 0,
            'content': 0,
            'last-updated': 0
        })
        for obj in objects_cursor:
            for keyword in _flatten_to_list(obj):
                if isinstance(keyword,unicode) and len(keyword)<200 and\
                   not ' ' in keyword and not '/' in keyword and\
                   not '@' in keyword and not 'mac-address' in keyword and\
                   not '[' in keyword and not 'image-id' in keyword and\
                   not 'command-line' in keyword and not 'memory-util' in keyword and\
                   not 'count' in keyword and not 'link_' in keyword and\
                   not 'group' in keyword and not 'sum' in keyword and\
                   not 'max' in keyword and not 'min' in keyword and\
                   not 'project' in keyword and not '_fake' in keyword:
                    #term.count(':')<=1 and term.count('-') <=2 and \
                    keywords.add(keyword)
        print "keywords length of %s = %d" % (server_name, len(keywords))

    #######################################
    # check the number of returned result #
    #######################################
    qid = 999999
    tailered_keywords = set()

    context = zmq.Context()
    receiver = context.socket(zmq.PULL)
    receiver.bind("tcp://*:{}".format(INTERFACE_PORT))
    sender = context.socket(zmq.PUSH)
    sender.connect("tcp://localhost:{}".format(DISPATCHER_PORT))
    for keyword in keywords:
        #        print keyword
        # create query object
        match_query, project_query, aggr_query, group_query, pass_one_query, link_attributes, pass_two_query, flags = nsInterface(
            "count(object-name) " + keyword, True)

        if not match_query:
            print "unable to parse %s" % keyword
            continue

        if 'aggregation' in flags:
            query = QueryObject(match_query,
                                project_query,
                                aggr_query,
                                group_query,
                                parameters={
                                    'isRank': False,
                                    "isApprox": False
                                })
        else:
            print 'parsing query error'

        # create echo msg and send
        msg = ECHOMessage(ECHOMessage.MSG_TYPE_INVOKE, qid, "localhost", query)
        sender.send(msg.serialize())
        qid += 1
        # receive results
        results = receiver.recv()
        msg = ECHOMessage.deserialize(results)
        n_object_match = msg.get_data()[0]['object-name-count']

        if n_object_match > MIN_DF_THRESHOLD and n_object_match < DF_THRESHOLD:
            tailered_keywords.add(keyword)


#        print len(tailered_keywords)
    print "tailered keywords length = " + str(len(tailered_keywords))

    ###################
    # add combination #
    ###################

    temp_pool = set()
    for i in xrange(MIN_COMBINATION, MAX_COMBINATION):
        for j in xrange(0, 400):
            temp_pool.add(' | '.join(random.sample(tailered_keywords, i)))

    print "total keywords length = " + str(
        len(tailered_keywords) + len(temp_pool))
    ###################
    # write to a file #
    ###################

    print 'writing...'
    with open(FILE, 'w') as f:
        if MIN_COMBINATION == 1:
            f.write('\n'.join(tailered_keywords))
        f.write('\n'.join(temp_pool))
    print 'DONE'
    def search(self):
        self.dbQuery = ""
        print "\n\n\n"
        print self.queryExpression
        print "\n\n\n"

        t0 = time.time()

        # parse user query
        match_query, project_query, aggr_query, group_query, pass_one_query, link_attributes, pass_two_query, flags = nsInterface(
            self.queryExpression, not self.isApprox)
        print flags, match_query
        print '\n\n\n'

        if 'link' in flags and flags['link']:
            self.link(pass_one_query, link_attributes, pass_two_query, flags)
        elif 'projection' in flags:
            self.projection(match_query, project_query)
        elif 'aggregation' in flags:
            self.aggregation(match_query, project_query, aggr_query,
                             group_query)
        else:
            # normal execution
            self.basic(match_query, flags)

        self.clean_up_result()

        self.numResults = len(self.results)
        self.time = time.time() - t0
Esempio n. 5
0
def main():
  
    keywords = set()
    
    ####################
    # getting keywords #
    ####################
    for server_name in server_names:
        connection = pymongo.MongoClient(server_name,27017)
        collection = connection["sensor"]["objects"]
        objects_cursor = collection.find({},{'_id':0,'content':0,'last-updated':0})
        for obj in objects_cursor:
            for keyword in _flatten_to_list(obj):
                if isinstance(keyword,unicode) and len(keyword)<200 and\
                   not ' ' in keyword and not '/' in keyword and\
                   not '@' in keyword and not 'mac-address' in keyword and\
                   not '[' in keyword and not 'image-id' in keyword and\
                   not 'command-line' in keyword and not 'memory-util' in keyword and\
                   not 'count' in keyword and not 'link_' in keyword and\
                   not 'group' in keyword and not 'sum' in keyword and\
                   not 'max' in keyword and not 'min' in keyword and\
                   not 'project' in keyword and not '_fake' in keyword:
                    #term.count(':')<=1 and term.count('-') <=2 and \
                    keywords.add(keyword)
        print "keywords length of %s = %d" %(server_name,len(keywords))
        
    #######################################
    # check the number of returned result #
    #######################################
    qid = 999999
    tailered_keywords = set()
    
    context = zmq.Context()
    receiver = context.socket(zmq.PULL)
    receiver.bind("tcp://*:{}".format(INTERFACE_PORT))
    sender = context.socket(zmq.PUSH)
    sender.connect ("tcp://localhost:{}".format(DISPATCHER_PORT))
    for keyword in keywords:
#        print keyword
        # create query object
        match_query,project_query,aggr_query,group_query,pass_one_query,link_attributes,pass_two_query,flags = nsInterface("count(object-name) "+keyword,True)
        
        if not match_query:
            print "unable to parse %s" % keyword
            continue
        
        if 'aggregation' in flags:
            query = QueryObject(match_query,project_query,aggr_query,group_query,parameters={'isRank':False,"isApprox":False})
        else:
            print 'parsing query error'
        
        # create echo msg and send
        msg = ECHOMessage(ECHOMessage.MSG_TYPE_INVOKE,qid,"localhost",query)
        sender.send(msg.serialize())          
        qid += 1
        # receive results
        results = receiver.recv()
        msg = ECHOMessage.deserialize(results)
        n_object_match = msg.get_data()[0]['object-name-count']
        
        if n_object_match > MIN_DF_THRESHOLD and n_object_match < DF_THRESHOLD:
            tailered_keywords.add(keyword)
#        print len(tailered_keywords)    
    print "tailered keywords length = "+str(len(tailered_keywords))
    
    ###################
    # add combination #
    ###################

    temp_pool = set()
    for i in xrange(MIN_COMBINATION,MAX_COMBINATION):
        for j in xrange(0,400):
            temp_pool.add(' | '.join(random.sample(tailered_keywords,i)))
            
    print "total keywords length = "+str(len(tailered_keywords)+len(temp_pool))    
    ###################
    # write to a file #
    ###################
    
    print 'writing...'    
    with open(FILE,'w') as f:
        if MIN_COMBINATION == 1:
            f.write('\n'.join(tailered_keywords))
        f.write('\n'.join(temp_pool))
    print 'DONE'
 def search(self):
     self.dbQuery = ""
     print "\n\n\n"
     print self.queryExpression
     print "\n\n\n"        
     
     t0 = time.time()
     
     # parse user query
     match_query,project_query,aggr_query,group_query,pass_one_query,link_attributes,pass_two_query,flags = nsInterface(self.queryExpression, not self.isApprox)
     print flags,match_query
     print '\n\n\n' 
     
     if 'link' in flags and flags['link']:
         self.link(pass_one_query,link_attributes,pass_two_query,flags)
     elif 'projection' in flags:
         self.projection(match_query,project_query)
     elif 'aggregation' in flags:
         self.aggregation(match_query, project_query, aggr_query, group_query)
     else:
         # normal execution
         self.basic(match_query,flags)
         
     self.clean_up_result()
     
     self.numResults = len(self.results)
     self.time = time.time()-t0