def interpret(self, raw_query): """ (str,) -> Query_object Interpret the raw query to a query object so that the system can handle it. """ # Parse the query match_query, project_query, aggr_query, \ group_query, pass_one_query, link_attributes, \ pass_two_query, flags = nsInterface(raw_query, self.args.userExact) if not match_query: logging.warning("unable to parse %s" % raw_query) if 'projection' in flags: project_query.update({'object-name': 1, 'object-type': 1}) return QueryObject(match_query, project_query, parameters={ 'isRank': False, "isApprox": False, "limit": self.args.limit }) elif 'aggregation' in flags: return QueryObject(match_query, project_query, aggr_query, group_query, parameters={ 'isRank': False, "isApprox": False, "limit": self.args.limit }) elif 'approximate' in flags: return QueryObject(match_query, parameters={ 'isRank': True, "isApprox": True, "limit": self.args.limit }) elif 'exact' in flags: return QueryObject(match_query, parameters={ 'isRank': False, "isApprox": False, "limit": self.args.limit })
def interpret(self, raw_query): """ (str,) -> Query_object Interpret the raw query to a query object so that the system can handle it. """ # Parse the query match_query, project_query, aggr_query, \ group_query, pass_one_query, link_attributes, \ pass_two_query, flags = nsInterface(raw_query, self.args.userExact) if not match_query: logging.warning("unable to parse %s" % raw_query) if 'projection' in flags: project_query.update({'object-name': 1, 'object-type': 1}) return QueryObject( match_query, project_query, parameters={'isRank': False, "isApprox": False, "limit": self.args.limit} ) elif 'aggregation' in flags: return QueryObject( match_query, project_query, aggr_query, group_query, parameters={'isRank': False, "isApprox": False, "limit": self.args.limit} ) elif 'approximate' in flags: return QueryObject( match_query, parameters={'isRank': True, "isApprox": True, "limit": self.args.limit} ) elif 'exact' in flags: return QueryObject( match_query, parameters={'isRank': False, "isApprox": False, "limit": self.args.limit} )
def main(): keywords = set() #################### # getting keywords # #################### for server_name in server_names: connection = pymongo.MongoClient(server_name, 27017) collection = connection["sensor"]["objects"] objects_cursor = collection.find({}, { '_id': 0, 'content': 0, 'last-updated': 0 }) for obj in objects_cursor: for keyword in _flatten_to_list(obj): if isinstance(keyword,unicode) and len(keyword)<200 and\ not ' ' in keyword and not '/' in keyword and\ not '@' in keyword and not 'mac-address' in keyword and\ not '[' in keyword and not 'image-id' in keyword and\ not 'command-line' in keyword and not 'memory-util' in keyword and\ not 'count' in keyword and not 'link_' in keyword and\ not 'group' in keyword and not 'sum' in keyword and\ not 'max' in keyword and not 'min' in keyword and\ not 'project' in keyword and not '_fake' in keyword: #term.count(':')<=1 and term.count('-') <=2 and \ keywords.add(keyword) print "keywords length of %s = %d" % (server_name, len(keywords)) ####################################### # check the number of returned result # ####################################### qid = 999999 tailered_keywords = set() context = zmq.Context() receiver = context.socket(zmq.PULL) receiver.bind("tcp://*:{}".format(INTERFACE_PORT)) sender = context.socket(zmq.PUSH) sender.connect("tcp://localhost:{}".format(DISPATCHER_PORT)) for keyword in keywords: # print keyword # create query object match_query, project_query, aggr_query, group_query, pass_one_query, link_attributes, pass_two_query, flags = nsInterface( "count(object-name) " + keyword, True) if not match_query: print "unable to parse %s" % keyword continue if 'aggregation' in flags: query = QueryObject(match_query, project_query, aggr_query, group_query, parameters={ 'isRank': False, "isApprox": False }) else: print 'parsing query error' # create echo msg and send msg = ECHOMessage(ECHOMessage.MSG_TYPE_INVOKE, qid, "localhost", query) sender.send(msg.serialize()) qid += 1 # receive results results = receiver.recv() msg = ECHOMessage.deserialize(results) n_object_match = msg.get_data()[0]['object-name-count'] if n_object_match > MIN_DF_THRESHOLD and n_object_match < DF_THRESHOLD: tailered_keywords.add(keyword) # print len(tailered_keywords) print "tailered keywords length = " + str(len(tailered_keywords)) ################### # add combination # ################### temp_pool = set() for i in xrange(MIN_COMBINATION, MAX_COMBINATION): for j in xrange(0, 400): temp_pool.add(' | '.join(random.sample(tailered_keywords, i))) print "total keywords length = " + str( len(tailered_keywords) + len(temp_pool)) ################### # write to a file # ################### print 'writing...' with open(FILE, 'w') as f: if MIN_COMBINATION == 1: f.write('\n'.join(tailered_keywords)) f.write('\n'.join(temp_pool)) print 'DONE'
def search(self): self.dbQuery = "" print "\n\n\n" print self.queryExpression print "\n\n\n" t0 = time.time() # parse user query match_query, project_query, aggr_query, group_query, pass_one_query, link_attributes, pass_two_query, flags = nsInterface( self.queryExpression, not self.isApprox) print flags, match_query print '\n\n\n' if 'link' in flags and flags['link']: self.link(pass_one_query, link_attributes, pass_two_query, flags) elif 'projection' in flags: self.projection(match_query, project_query) elif 'aggregation' in flags: self.aggregation(match_query, project_query, aggr_query, group_query) else: # normal execution self.basic(match_query, flags) self.clean_up_result() self.numResults = len(self.results) self.time = time.time() - t0
def main(): keywords = set() #################### # getting keywords # #################### for server_name in server_names: connection = pymongo.MongoClient(server_name,27017) collection = connection["sensor"]["objects"] objects_cursor = collection.find({},{'_id':0,'content':0,'last-updated':0}) for obj in objects_cursor: for keyword in _flatten_to_list(obj): if isinstance(keyword,unicode) and len(keyword)<200 and\ not ' ' in keyword and not '/' in keyword and\ not '@' in keyword and not 'mac-address' in keyword and\ not '[' in keyword and not 'image-id' in keyword and\ not 'command-line' in keyword and not 'memory-util' in keyword and\ not 'count' in keyword and not 'link_' in keyword and\ not 'group' in keyword and not 'sum' in keyword and\ not 'max' in keyword and not 'min' in keyword and\ not 'project' in keyword and not '_fake' in keyword: #term.count(':')<=1 and term.count('-') <=2 and \ keywords.add(keyword) print "keywords length of %s = %d" %(server_name,len(keywords)) ####################################### # check the number of returned result # ####################################### qid = 999999 tailered_keywords = set() context = zmq.Context() receiver = context.socket(zmq.PULL) receiver.bind("tcp://*:{}".format(INTERFACE_PORT)) sender = context.socket(zmq.PUSH) sender.connect ("tcp://localhost:{}".format(DISPATCHER_PORT)) for keyword in keywords: # print keyword # create query object match_query,project_query,aggr_query,group_query,pass_one_query,link_attributes,pass_two_query,flags = nsInterface("count(object-name) "+keyword,True) if not match_query: print "unable to parse %s" % keyword continue if 'aggregation' in flags: query = QueryObject(match_query,project_query,aggr_query,group_query,parameters={'isRank':False,"isApprox":False}) else: print 'parsing query error' # create echo msg and send msg = ECHOMessage(ECHOMessage.MSG_TYPE_INVOKE,qid,"localhost",query) sender.send(msg.serialize()) qid += 1 # receive results results = receiver.recv() msg = ECHOMessage.deserialize(results) n_object_match = msg.get_data()[0]['object-name-count'] if n_object_match > MIN_DF_THRESHOLD and n_object_match < DF_THRESHOLD: tailered_keywords.add(keyword) # print len(tailered_keywords) print "tailered keywords length = "+str(len(tailered_keywords)) ################### # add combination # ################### temp_pool = set() for i in xrange(MIN_COMBINATION,MAX_COMBINATION): for j in xrange(0,400): temp_pool.add(' | '.join(random.sample(tailered_keywords,i))) print "total keywords length = "+str(len(tailered_keywords)+len(temp_pool)) ################### # write to a file # ################### print 'writing...' with open(FILE,'w') as f: if MIN_COMBINATION == 1: f.write('\n'.join(tailered_keywords)) f.write('\n'.join(temp_pool)) print 'DONE'
def search(self): self.dbQuery = "" print "\n\n\n" print self.queryExpression print "\n\n\n" t0 = time.time() # parse user query match_query,project_query,aggr_query,group_query,pass_one_query,link_attributes,pass_two_query,flags = nsInterface(self.queryExpression, not self.isApprox) print flags,match_query print '\n\n\n' if 'link' in flags and flags['link']: self.link(pass_one_query,link_attributes,pass_two_query,flags) elif 'projection' in flags: self.projection(match_query,project_query) elif 'aggregation' in flags: self.aggregation(match_query, project_query, aggr_query, group_query) else: # normal execution self.basic(match_query,flags) self.clean_up_result() self.numResults = len(self.results) self.time = time.time()-t0