def getAllForBuilding(building_id): floors = [] results = Query.getAllResults("select * from " + Query.getDBName() + ".FLOORS where BUILDING_ID = '" + building_id + "'") for result in results: floor = Floor() floors.append(floor.loadFromResult(result)) return floors
def getAllFloors(): floors = [] results = Query.getAllResults("select * from " + Query.getDBName() + ".FLOORS") for result in results: floor = Floor() floors.append(floor.loadFromResult(result)) return floors
def getAllCourses(): courses = [] results = Query.getAllResults("select * from " + Query.getDBName() + ".COURSES") for result in results: course = Course() courses.append(course.loadFromResult(result)) return courses
def getAllForUser(username): schedules = [] results = Query.getAllResults("select * from " + Query.getDBName() + ".SCHEDULES where USERNAME = '******'") for result in results: schedule = Schedule() schedules.append(schedule.loadFromResult(result)) return schedules
def create_chunk_comoress(): if request.method == 'POST': try: request_data = request.get_json() app.logger.debug(request_data) if request_data["t1"] > request_data["t2"]: app.logger.error("data") abort(400) query = Query(config.main["query"], request_data["t1"], request_data["t2"], 150.0) chunk_list = query.separate(request_data["channels"]) chunk_dict = { "len_dict": len(chunk_list), "first": { "t1": chunk_list[0].t1, "t2": chunk_list[0].t2, "channels": chunk_list[0].ch, "uuid": chunk_list[0].uuid } } js = json.dumps(chunk_dict) return js except (TypeError, ValueError) as ex: app.logger.error(str(ex)) abort(400) else: abort(400)
def generateQueries(numQueries, segmentCount, segmentGenerator, minSize, maxSize, sizeGenerator): querylist = list() segmentlist = segmentGenerator.generateDistribution( 1, segmentCount, numQueries) #print "Segment List" #Utils.printlist(segmentlist) sizelist = sizeGenerator.generateDistribution(minSize, maxSize, numQueries) #print "Size List" #Utils.printlist(sizelist) for i in xrange(numQueries): q = Query(QueryGenerator.queryRunningCount) QueryGenerator.queryRunningCount += 1 startsegment = 0 chosensegment = segmentlist[i] if chosensegment + sizelist[i] - 1 > segmentCount: startsegment = chosensegment - ( sizelist[i] - (segmentCount - chosensegment + 1)) else: startsegment = chosensegment for j in xrange(startsegment, startsegment + sizelist[i]): q.add(j) querylist.append(q) return querylist
def generateQueries(startTime, endTime, numQueries, accessGenerator, periodGenerator, popularityList): querylist = list() elapsed = (endTime - startTime).total_seconds() accesslist = accessGenerator.generateDistribution(0, elapsed, numQueries, popularityList) periodlist = periodGenerator.generateDistribution(1, elapsed, numQueries, popularityList) histogram = {} for i in xrange(numQueries): q = Query(QueryGenerator.queryRunningCount, elapsed) QueryGenerator.queryRunningCount += 1 sttime = accesslist[i] #print "sttime: %s" % sttime #if (starttime + periodlist[i] - 1 > elapsed): # starttime = starttime - (periodlist[i] - (elapsed - starttime + 1) newstart = startTime + dt.timedelta(0, sttime) startstring = newstart.strftime('%Y-%m-%dT%H:%M:%S') #print(periodlist[i], Utils.iso8601(dt.timedelta(seconds=periodlist[i]))) q.setInterval(startstring + "/" + Utils.iso8601(dt.timedelta(seconds=periodlist[i]))) querylist.append(q) #print "interval: " + q.interval #print "index: " , q.index print "starttime: " , q.startTime for j in xrange(periodlist[i]): if (q.startTime+dt.timedelta(seconds=periodlist[i])) in historgram: historgram[q.startTime+dt.timedelta(seconds=periodlist[i])] = historgram[q.startTime+dt.timedelta(seconds=periodlist[i])]+1; else: historgram[q.startTime+dt.timedelta(seconds=periodlist[i])] = 1 print histogram return querylist
def read(self): nodes = [] queries = [] input = fileinput.input() state = "" for line in input: line = "".join(line.split()) if "Nodes" in line: line = "" state = "readNode" if "Probabilities" in line: line = "" state = "readProb" if "Queries" in line: line = "" state = "readQuery" if state == "readNode" and line: nodeNames = line.split(",") for name in nodeNames: nodes.append(Node(name)) if state == "readProb" and line: if "|" in line: nodeName, other = line.split("|") childNode = self.searchNode(nodeName[1:], nodes) evidence, probability = other.split("=") parents = evidence.split(",") tableRow = Set([]) for parent in parents: parentNode = self.searchNode(parent[1:], nodes) childNode.parents.add(parentNode) parentNode.childs.add(childNode.name) tableRow.add(parent) childNode.table.append([tableRow, probability]) else: nodeName, probability = line.split("=") tableRow = Set([]) node = self.searchNode(nodeName[1:], nodes) tableRow.add(nodeName[0]) node.table.append([tableRow, probability]) if state == "readQuery" and line: if "|" in line: query = Query() queryNodes, evidenceNodes = line.split("|") for node in queryNodes.split(","): query.queries.append(node) for node in evidenceNodes.split(","): query.evidence.append(node) queries.append(query) else: query = Query() for node in line.split(","): query.queries.append(node) queries.append(query) return nodes, queries
def getAllUsers(): users = {} results = Query.getAllResults("select * from " + Query.getDBName() + ".USERS") for result in results: user = User() users.add(user.loadFromResult(result)) return users
def getAllCoordinates(): coordinates = [] results = Query.getAllResults("select * from " + Query.getDBName() + ".COORDINATES") for result in results: coordinate = Coordinate() coordinates.append(coordinate.loadFromResult(result)) return coordinates
def getAllForBuilding(building_id): coordinates = [] results = Query.getAllResults("select * from " + Query.getDBName() + ".COORDINATES where BUILDING_ID = '" + building_id + "'") for result in results: coordinate = Coordinate() coordinates.append(coordinate.loadFromResult(result)) return coordinates
def getAllSchedules(): schedules = [] results = Query.getAllResults("select * from " + Query.getDBName() + ".SCHEDULES") for result in results: schedule = Schedule() schedules.append(schedule.loadFromResult(result)) return schedules
def get(self, query): l = [] global current_heap global length global times global words words = set() q = Query(query, index, page_rank, mapping, stop_words) start = time.time() current_heap = q.retrieve_query() for x in range(50): try: while current_heap[0][1] in words: heappop(current_heap) words.add(current_heap[0][1]) l.append(mapping[current_heap[0][1]]) heappop(current_heap) except: break end = time.time() length = len(l) final = end - start times = final return {'data': l, "time": times, "len": length}
def getAllCoursesForUserName(user_id): courses = [] results = Query.getAllResults("select * from " + Query.getDBName() + ".COURSES where user_id = " + str(user_id)) for result in results: course = Course() courses.append(course.loadFromResult(result)) return courses
def getAllBuildings(): buildings = [] results = Query.getAllResults("select * from " + Query.getDBName() + ".BUILDINGS") for result in results: building = Building() buildings.append(building.loadFromResult(result)) return buildings
def testRead(self): self.assertEqual( str( Query() .select(["id", "name"]) .from_table("table") .where("id = 0") .and_("deleted = 0") .order("main DESC") .limit(0, 10) ), "SELECT id, name FROM table WHERE id = 0 AND deleted = 0 ORDER BY" " main DESC LIMIT 0, 10", ) self.assertEqual( str( Query() .select("id, name") .from_table("table") .where("id = 0") .and_("deleted = 0") .order("main DESC") .limit(0, 10) ), "SELECT id, name FROM table WHERE id = 0 AND deleted = 0 ORDER BY" " main DESC LIMIT 0, 10", )
def loadFromID(self, id): print self.id result = Query.getOneResult("select * from " + Query.getDBName() + ".COURSES where ID = " + str(id)) if result is None: return self.loadFromResult(result) return self
def test2_update_dimension_3(): temp = Query(dbms=dbms, columns=[Column("1","numeric(1)"),Column("'DESCONOCIDO'","varchar()")]) cond = Query( dbms=dbms, sources=[lkp], columns=[Column(dbms.count("TEST_ID"), "numeric(3)")] ) statements = "IF (" + cond.code() + ") = 0 BEGIN\n" statements += lkp.insert( query= temp ) statements += "END\n" statements += lkp.update_scd1( source = table, join_conditions = [["CauBajCod = TEST_ID"]] ) statements += lkp.update_from( columns = [lkp.columns["TEST_DESC"]], source = table, where = [table.columns["CauBajCod"].equals(lkp.columns["TEST_ID"])] ) return statements
def loadFromAll(self, username, schedule_name): SQL = "select * from " + Query.getDBName() + ".SCHEDULES where USERNAME = '******' and SCHEDULE_NAME = '" + schedule_name + "'" result = Query.getOneResult(SQL) if result is not None: self.loadFromResult(result) self.username = username self.schedule_name = schedule_name return self
def run_retrieval_models_tasks(config, inverted_index, indexer, root_dir, top_k=10, judge_queries=[3]): queries = None final_judgments_file_name = root_dir + '/evaluation/' + 'judgments.txt' open(final_judgments_file_name, 'w').close() # Read the retrieval model queries from disk with open(root_dir + '/evaluation/queries_retrieval_model.txt', 'r') as f: queries = f.read().split('\n') with open(root_dir + '/evaluation/trecrun_configs.json', 'r') as f: trecrun_configs = json.load(f) oit_identifier = trecrun_configs['oitIdentifier'] trecrun_output_format = trecrun_configs['outputFormat'] tasks = trecrun_configs['tasks'] for task in tasks: retrieval_model = task['retrievalModelName'] retrieval_model_method = task['retrievalModelMethod'] retrieval_model_args = task['params'] params = '-'.join( str(arg) for arg in list(retrieval_model_args.values())) if params: params = '-' + params query_index = Query(config, inverted_index, mode='doc', retrieval_model=retrieval_model, count=inverted_index.get_total_docs(), **retrieval_model_args) query_results = [] for i, query in enumerate(queries): query_result = { 'query': query, 'topic_number': i + 1, 'run_tag': oit_identifier + '-' + retrieval_model_method + params, 'docs': query_index.get_documents(query) } query_results.append(query_result) trecrun_file_name = root_dir + '/evaluation/' + \ retrieval_model_method + trecrun_output_format generate_trecrun_file(trecrun_file_name, query_results) scenes = get_scenes(indexer.load_data()) trecrun_judgments_file_name = root_dir + '/evaluation/' + \ retrieval_model_method + '_judgments.txt' generate_trecrun_judgments_file(trecrun_judgments_file_name, query_results, scenes, top_k, judge_queries) generate_final_judgments_file(final_judgments_file_name, query_results, top_k, judge_queries)
def save(self): if self.in_DB: #update SQL = "update " + Query.getDBName() + ".BUILDINGS set NAME = '" + self.name + "', PHONE_NUMBER = '" + self.phone_number + "', HOURS = '" + self.hours + "' where ID = '" + self.id + "'" else: #insert SQL = "insert into " + Query.getDBName() + ".BUILDINGS (ID, NAME, PHONE_NUMBER, HOURS) values('" + self.id + "', '" + self.name + "', '" + self.phone_number + "', '" + self.hours + "')" Query.execute(SQL) self.in_DB = True
def save(self): if self.in_DB: #update SQL = "update " + Query.getDBName() + ".USERS set PASSWORD = '******' where USERNAME = '******'" else: #insert SQL = "insert into " + Query.getDBName() + ".USERS (USERNAME, PASSWORD, SALT) values('" + self.username + "', '" + self.password + "', '" + self.salt + "')" self.in_DB = True Query.execute(SQL)
def parse(self): queries = [] c_query = None c_doc = None self.avg_anchor_length = 0 self.avg_title_length = 0 self.avg_body_length = 0 self.docs = 0 self.titles = 0 self.anchors = 0 f = open(self.queryDocPath, 'r') line = f.readline().strip() while True: if line == None or line == "": break elif line.startswith('query'): c_query = Query(self.idf) queries.append(c_query) c_query.set_query(line.split(':')[1]) elif line.startswith('url'): self.docs+=1 if not c_doc == None: c_doc.calculate_vectors() c_doc = Document(c_query) c_query.add_doc(c_doc) c_doc.set_url(line) elif line.startswith('title'): c_doc.set_title(line.split(':')[1]) self.avg_title_length += len(c_doc.title_terms) self.titles += 1 elif line.startswith('body_hits'): x = line.split(':')[1].strip().split() term = x[0] hits = x[1:] c_doc.add_body_hits(term, hits) elif line.startswith('body_length'): c_doc.set_body_length(line.split(':')[1]) self.avg_body_length += c_doc.body_length elif line.startswith('anchor_text'): text = line.split(':')[1].strip() line = f.readline() count = line.split(':')[1].strip() c_doc.add_anchor_text(text, count) self.avg_anchor_length += int(count) self.avg_anchors_per_doc += int(count) line = f.readline().strip() if not c_doc == None: c_doc.calculate_vectors() #We calculate avg anchor length as follows: #Consider all words in anchor text for a doc as one BIG document, so count up all occurrences of anchor words # and divide them by the number of docs self.avg_anchor_length = self.avg_anchor_length*1.0/self.docs self.avg_title_length = self.avg_title_length*1.0/self.titles self.avg_body_length = self.avg_body_length*1.0/self.docs self.avg_anchors_per_doc = self.avg_anchors_per_doc*1.0 / self.docs return queries
def set_test_vals(self, char_limit): query = Query("www.irrelevant.com", 0) query.choose_char_limit(char_limit) self.query_vals = { "is_limit": query.is_limit, "less than": query.less_than, "value": query.char_limit }
def loadFromID(self, id): result = Query.getOneResult("select * from " + Query.getDBName() +".BUILDINGS where ID = '" + id + "'") if result is None: return self.loadFromResult(result) #floorPlans = [] floors = Floor.getAllForBuilding(id) for floor in floors: self.floorPlans.append(floor.floor_map) return self
def load_categories(): """ Get the list of all categories and attach them to the Category class as attributes. This should be called whenever a category is added or removed. """ session = Session() query = Query(session) categories = query.get_categories() for category in categories: setattr(Category, category.name, category)
def requestContent(self, fid): for record in self.resourceTable.records: if record.fid == fid: query = Query() query.addRecord(fid) query.send((record.ip_addr, self.port), self.port, self.interface) return discovery = Discovery() discovery.addRecord(fid) discovery.send(None, self.port, self.interface)
def fetchMetaFromGithub(commit_hash, project_name, self2, url): # 没有缓存,向github请求meta信息 query = Query(url) status_code, message, content = query.query() resultContent = None if status_code == -1: # 无效url,不访问服务器 # self2.send_response(200) # self2.end_headers() # result = Result(True, "please enter correct commit url") # self2.wfile.write(result.__dict__.__str__().encode()) resultContent = "invalid github commit url" return False, resultContent elif status_code == 200: if message is not Message.success: # self2.send_response(200) # self2.end_headers() # result = Result(True, message) # self2.wfile.write(result.__dict__.__str__().encode()) return False, message else: file_list = content[0] meta = content[1] if not checkFileRaw(file_list): # self2.send_response(200) # self2.end_headers() # result = Result(True, Message.internet_error) # self2.wfile.write(result.__dict__.__str__().encode()) return False, "message internet error with github" self2.send_response(200) self2.end_headers() result = Result(True, Message.success) # self.wfile.write(result.__dict__.__str__().encode()) # 访问服务器 # 此时已经获得所有文件,生成一个 multipart_encoder = DataNet.initData(file_list, meta) # print(multipart_encoder) r = requests.post( Api.GENERATE_META, data=multipart_encoder, headers={'Content-Type': multipart_encoder.content_type}) if r.status_code == 200: # self2.wfile.write(r.content) cache = CommitCache() cache.add_commit_hash(commit_hash, project_name) return True, r.content else: return False, "connection error with cldiff" # 请求结束 # 写入数据库 else: if message == Message.internet_error: resultContent = "internet error with github" return False, resultContent
def update_keys(self, keywords): Query.update_keys(self, keywords) for key in ['NAMES', 'PATH', 'SIZE', 'CHANNEL']: self.set_key(self.OUTPUT.INFO, key) self.set_key(self.INPUT.INFO, 'FORMAT') # For the dataset queries self.set_key(self.OUTPUT.INFO, 'CHANNELS') self.set_key(self.OUTPUT.INFO, 'DATASET')
def search(queries_path, dictionary_path, postings_path, output_path): """ Searches dictionary and postings for patents that matches the queries """ global patent_info, dictionary dictionary = read_dictionary(dictionary_path) patent_info = util.load_dictionary(PATENT_INFO_PATH) query = Query(queries_path, dictionary, patent_info) initial_ranked_docs = query.get_ranked_docs() expanded_query = ExpandedQuery(query, initial_ranked_docs, patent_info) result = ' '.join(expanded_query.get_ranked_docs()) with codecs.open(output_path, 'w', encoding='utf-8') as o: o.write(result) return result
def update_keys(self, keywords): Query.update_keys(self, keywords) for key in ['NAMES','PATH','SIZE','CHANNEL']: self.set_key(self.OUTPUT.INFO,key) self.set_key(self.INPUT.INFO,'FORMAT') # For the dataset queries self.set_key(self.OUTPUT.INFO, 'CHANNELS') self.set_key(self.OUTPUT.INFO, 'DATASET')
def runCMD(): while True: question = input('?- ') if question == 'halt': break try: question = Query(question) subs = question.getAns() if subs: print(*subs, sep='\n') print(len(subs) > 0) except: print('Wrong query')
def main(): #All Crawling here! crawlSpider() invIndex = InvertedIndex() invIndex.loadPickles() queryObj = Query() os.system("clear") #Infinite loop while (1): print "" print "" print "" # invIndex.createTermFrequencyMatrix() queryObj.query = raw_input("Please enter a query for zackSpider: ") print "Your query is:", queryObj.query returnDocs = queryObj.parseQuery(queryObj.query, invIndex.inverted_index) if (returnDocs > 0): returnedDocs = sorted(returnDocs.items(), key=itemgetter(1), reverse=True) os.system("clear") print "" print "" print "" print "The following documents are ranked from highest to lowest similarity for your query: " print "---------------------------------------------------------------------------------------" print "{:<5} {:<15} {:<55} {:<10}".format('Doc', 'Similarity', 'Url', 'Preview') for key in returnedDocs: docKey = key[0] - 1 doc = invIndex.collections_index[docKey] sim = key[1] print "{:<5} {:<15.10f} {:<55} {:<10}".format( docKey, sim, doc[0], doc[1]) print "" print "" else: print "No results." print "" print ""
def __init__(self, *args, **keywords): Query.__init__(self, **keywords) query, zyx_index, kji_pixels = args self.source_list = self.RUNTIME.IMAGE.SOURCE.LIST self.SOURCES = { self.source_list[0]: HDF5, self.source_list[1]: BossGrid, self.source_list[2]: Mojo, self.source_list[3]: ImageStack, } self.RUNTIME.TILE.ZYX.VALUE = zyx_index self.RUNTIME.TILE.KJI.VALUE = kji_pixels self.RUNTIME.TILE.SCALES.VALUE = query.scales # Set blocksize from the query self.blocksize = query.blocksize # Get the right blocksize, datatype, and path q_type = query.OUTPUT.INFO.TYPE.VALUE q_path = query.OUTPUT.INFO.PATH.VALUE # Set the right blocksize, datatype, and path self.OUTPUT.INFO.TYPE.VALUE = q_type self.OUTPUT.INFO.PATH.VALUE = q_path # Very important to get the right datasource query_source = query.RUNTIME.IMAGE.SOURCE self_source = self.RUNTIME.IMAGE.SOURCE self_source.VALUE = query_source.VALUE # Only applies to HDF5 datasource query_h5 = query.RUNTIME.IMAGE.SOURCE.HDF5 self_h5 = self.RUNTIME.IMAGE.SOURCE.HDF5 self_h5.VALUE = query_h5.VALUE # Only applies to Mojo datasource query_format = query.RUNTIME.IMAGE.SOURCE.MOJO.FORMAT self_format = self.RUNTIME.IMAGE.SOURCE.MOJO.FORMAT self_format.VALUE = query_format.VALUE # Only applies to Boss datasource self_source.BOSS = query_source.BOSS # Get the XY resolution for Mojo query_xy = query.INPUT.RESOLUTION.XY self_xy = self.INPUT.RESOLUTION.XY self_xy.VALUE = query_xy.VALUE
def load_status_singletons(): """ Get the master instances of different statuses. Attach these to the class as attributes. This can be called again when or if the status types change, but is expected only to be called once per instance. """ session = Session() query = Query(session) post_statuses = query.get_post_statuses() for post_status in post_statuses: setattr(PostStatus, post_status.status, post_status) approvals = query.get_approval_statuses() for approval in approvals: setattr(Approval, approval.status, approval) author_statuses = query.get_author_statuses() for author_status in author_statuses: setattr(AuthorStatus, author_status.status, author_status) comment_statuses = query.get_comment_statuses() for comment_status in comment_statuses: setattr(CommentStatus, comment_status.status, comment_status) post_types = query.get_post_types() for post_type in post_types: setattr(PostType, post_type.type, post_type) trackback_statuses = query.get_trackback_statuses() for trackback_status in trackback_statuses: setattr(TrackbackStatus, trackback_status.status, trackback_status)
def update_keys(self, keywords): Query.update_keys(self, keywords) for key in ['Z','Y','X']: self.set_key(self.INPUT.POSITION, key, 0) for key in ['DEPTH','HEIGHT','WIDTH']: self.set_key(self.INPUT.POSITION, key, 1) for key in ['VIEW','FORMAT','OFFSET']: self.set_key(self.INPUT.IMAGE, key) self.set_key(self.OUTPUT.INFO, 'PATH') self.set_key(self.INPUT.RESOLUTION, 'XY', 0)
def test_init(self): terms = [] dates = [] query = Query(terms, dates) self.assertEqual(query.terms, terms) self.assertEqual(query.dates, dates) terms = [ QueryComponent("text", "german", QueryOperator.EQUALS), QueryComponent("text", "german", QueryOperator.LESS_THAN) ] dates = [] query = Query(terms, dates) self.assertEqual(query.terms, terms) self.assertEqual(query.dates, dates) terms = [ QueryComponent("text", "german", QueryOperator.EQUALS, False), QueryComponent("text", "german", QueryOperator.GREATER_THAN, False) ] dates = [] query = Query(terms, dates) self.assertEqual(query.terms, terms) self.assertEqual(query.dates, dates) terms = [] dates = [ QueryComponent("date", "2011/01/01", QueryOperator.LESS_THAN), QueryComponent("date", "2012/02/01", QueryOperator.GREATER_THAN) ] query = Query(terms, dates) self.assertEqual(query.terms, terms) self.assertEqual(query.dates, dates) terms = [ QueryComponent("text", "german", QueryOperator.EQUALS), QueryComponent("text", "german", QueryOperator.GREATER_THAN), QueryComponent("text", "german", QueryOperator.EQUALS, False), QueryComponent("text", "german", QueryOperator.GREATER_THAN, False) ] dates = [ QueryComponent("date", "2011/01/01", QueryOperator.LESS_THAN), QueryComponent("date", "2012/02/01", QueryOperator.GREATER_THAN) ] query = Query(terms, dates) self.assertEqual(query.terms, terms) self.assertEqual(query.dates, dates)
def test_2_update_dimension_1(): column1 = Column('Name', 'varchar(20)', False) column2 = Column('Age', 'numeric(2)', False) table = Table(tdms, 'Person', [column1, column2], [column1.name]) column1 = Column('Person_Name', 'varchar(20)', False) column2 = Column('Person_Age', 'numeric(2)', False) column_valid = Column('Valid_Record', 'numeric(1)', False) column_init_date = Column('Init_Date', 'date', False) column_end_date = Column('End_Date', 'date') lkp = SCDimension2(sql, 'Lkp_Person', [column1, column2], column_valid, column_init_date, column_end_date, [column1.name]) query = Query(dbms=sql, sources=[table], columns=[table.columns["Name"], table.columns["Age"]], alias='') return lkp.update_scd2(source=query, join_conditions=[[ lkp.columns['Person_Name'].equals( query.columns['Name']) ]])
def run(self, q=Query()): ''' If operation not exists ''' self.dispatch( "The operation not exists, 'help' to know the list of all operations" )
def startUserQuery(self): while True: query = Query() if query.isLeave(): print '\nGoodbye!' break # rank is a list of tuple (doc_name, similarity) # sorted by similarity in decreasing order. rank = self.processQuery(query) self.printSystemInfo() print print 'RANK'.ljust(10), 'DOC'.ljust(10), 'SIMILARITY' for index, item in enumerate(rank): print str(index).ljust(10), item[0].ljust(10), print str(format(item[1], '.2f')).ljust(10)
def run(self, q=Query()): ''' Dispatch wget + url for each files selected in the last query. Wget is a Linux and Mac command, and it used to download a file into the URL. The url is token into config.txt or is the default url. @param q: Query class ''' try: if len(self.args) > 0: #case nestled operation if self.args[0] == "find": f_op = FindOperation() f_op.set_args(self.args[1:]) f_op.run(q) self.dispatch(self.wget(q)) elif self.args[0] == "selectrow": sr_op = SelectrowOperation() sr_op.set_args(self.args[1:]) sr_op.run(q) self.dispatch(self.wget(q)) else: self.dispatch("Element after 'wget' is unknown") else: self.dispatch(self.wget(q)) except Exception as e: self.dispatch(e)
def run(self, q=Query()): #if without args, it tries to dispatch all fields, otherwise searches all possible values of args. if len(self.args) == 0: self.do_describe(q) else: #tupla=q.do_query({},0) for field in self.args: self.get_describe(q, field)
def getDistributedQueries(testset, numQueries): numCategories = len(testset.target_names) queriesPerTag = numQueries / numCategories i = 0 j = 0 tagTestsMap = {} queryList = [] while j < numQueries: if i > (len(testset.data) - 1): print "your queries can not be evenly distributed" return None categoryNum = testset.target[i] file = testset.data[i] category = testset.target_names[categoryNum] if tagTestsMap.has_key(category): tagTests = tagTestsMap[category] if tagTests >= queriesPerTag: i += 1 continue tagTestsMap[category] += 1 else: tagTestsMap[category] = 1 query = Query(testset.filenames[i], file, category) i += 1 j += 1 queryList.append(query) return queryList
def construir_query(cadena): query = None try: query = Query(cadena) except: print("Query en formato incorrecto") return query
def run(self, q=Query()): ''' Download any files into the path @param q: Query class ''' #initialization name_index = q.get_index("fname") size_index = q.get_index("size") dataset_index = q.get_index("dataset") url = q.get_url() query = q.do_query(query=q.get_last_query()) #check path in config.txt and, if not exists, set default path path = q.get_path() if path == "": path = self.default_path for i in query: #second part of initialization #for example: 'https://example.com/file.ext' tmp_url = url + i[name_index] #for example: '/Download/Dataset_name/' tmp_path = path + i[dataset_index] try: #dispatch number of file/tot files (progressive status of download) self.dispatch("\n" + str(query.index(i) + 1) + "/" + str(len(query))) #resume or start download self.download_with_resume(tmp_url, tmp_path, i[size_index]) except Exception as e: self.dispatch(str(e) + "\n") self.dispatch("\nDone...Downloading is complete...\n")
def test_eq(self): a = Query( [], []) b = Query( [], []) self.assertEqual(a, b) a = Query( [], [QueryComponent("date", "2011/02/21", QueryOperator.EQUALS)]) b = Query( [], [QueryComponent("date", "2011/02/21", QueryOperator.EQUALS)]) self.assertEqual(a, b) a = Query( [QueryComponent("text", "hey", QueryOperator.EQUALS), QueryComponent("text", "a", QueryOperator.LESS_THAN, False)], [QueryComponent("date", "2011/02/21", QueryOperator.EQUALS)]) b = Query( [QueryComponent("text", "hey", QueryOperator.EQUALS), QueryComponent("text", "a", QueryOperator.LESS_THAN, False)], [QueryComponent("date", "2011/02/21", QueryOperator.EQUALS)]) self.assertEqual(a, b) a = Query( [QueryComponent("text", "hey", QueryOperator.EQUALS), QueryComponent("text", "a", QueryOperator.LESS_THAN, False)], [QueryComponent("date", "2011/02/21", QueryOperator.LESS_THAN)]) b = Query( [QueryComponent("text", "hey", QueryOperator.EQUALS), QueryComponent("text", "a", QueryOperator.LESS_THAN, False)], [QueryComponent("date", "2011/02/21", QueryOperator.EQUALS)]) self.assertNotEqual(a, b) a = Query( [QueryComponent("text", "hey", QueryOperator.EQUALS), QueryComponent("text", "abc", QueryOperator.LESS_THAN, False)], [QueryComponent("date", "2011/02/21", QueryOperator.EQUALS)]) b = Query( [QueryComponent("text", "hey", QueryOperator.EQUALS), QueryComponent("text", "a", QueryOperator.LESS_THAN, False)], [QueryComponent("date", "2011/02/21", QueryOperator.EQUALS)]) self.assertNotEqual(a, b)
def run(self, q=Query()): ''' dispatch all argoments into log.txt ''' log = Document("log.txt") story = log.get_params() for i in story: self.dispatch(story[i] + "\n")
def do_describe(self, q=Query()): try: s = "" for e in q.do_describe(): s += e + "\n" self.dispatch(s) except Exception as e: self.dispatch(e)
def get_describe(self, q=Query(), field=""): try: all_values = q.do_query(select=field) self.dispatch("All values of " + field + ":\n") for x in set(all_values): self.dispatch(x[0]) except Exception as e: self.dispatch(e)
def __init__(self, parent=None): super().__init__(parent) self.parent = parent self.query = Query(self) self.tree_timer = TreeTimer(self) self.initialize()
def db_create(son): """ Creates a new Instance based of database SON data. Gets called by transform_outgoing of SONManipulator """ return PathElement(query=Query.find_by_id(son['query_id']), condition=Condition.from_dict(son['condition']), _id=son['_id'])
def main(): #All Crawling here! crawlSpider(); invIndex = InvertedIndex() invIndex.loadPickles() queryObj = Query() os.system("clear") #Infinite loop while(1): print"" print"" print"" # invIndex.createTermFrequencyMatrix() queryObj.query = raw_input("Please enter a query for zackSpider: ") print "Your query is:", queryObj.query returnDocs = queryObj.parseQuery(queryObj.query, invIndex.inverted_index) if (returnDocs > 0): returnedDocs = sorted(returnDocs.items(),key=itemgetter(1), reverse=True) os.system("clear") print"" print"" print"" print "The following documents are ranked from highest to lowest similarity for your query: " print"---------------------------------------------------------------------------------------" print "{:<5} {:<15} {:<55} {:<10}".format('Doc', 'Similarity', 'Url','Preview') for key in returnedDocs: docKey = key[0]-1 doc = invIndex.collections_index[docKey] sim = key[1] print "{:<5} {:<15.10f} {:<55} {:<10}".format(docKey, sim, doc[0], doc[1]) print"" print"" else: print "No results." print"" print""
def requestContents(self, fids): discovery = Discovery() for fid in fids: found = False for record in self.resourceTable.records: if record.fid == fid: # if found, send a query to the holder. query = Query() query.addRecord(fid) query.send((record.ip_addr, self.port), self.port, self.interface) found = True break if not found: # if not found, add a record in the discovery message discovery.addRecord(fid) if discovery.header.length != 0: # if the discovery message is not empty, send the message. discovery.send(None, self.port, self.interface)
def generateQueries(start, time, numQueries, accessGenerator, minPeriod, maxPeriod, periodGenerator): querylist = list() y = time - start z = y.total_seconds() x = datetime.timedelta(seconds = z) elapsed = x.total_seconds() accesslist = accessGenerator.generateDistribution(0, elapsed, numQueries) periodlist = periodGenerator.generateDistribution(minPeriod, maxPeriod, numQueries) for i in xrange(numQueries): q = Query(QueryGenerator.queryRunningCount, elapsed) QueryGenerator.queryRunningCount += 1 starttime = accesslist[i] #if (starttime + periodlist[i] - 1 > elapsed): # starttime = starttime - (periodlist[i] - (elapsed - starttime + 1) newstart = start + datetime.timedelta(0, starttime) startstring = newstart.strftime('%Y-%m-%dT%H:%M:%S') q.startTime = startstring endtime = newstart if(periodlist[i] < 604800): if(periodlist[i] < 86400): if(periodlist[i] < 3600): if(periodlist[i] < 60): endtime = newstart + datetime.timedelta(seconds = periodlist[i]) else: a = math.ceil(periodlist[i]/60) endtime = newstart + datetime.timedelta(minutes = a) else: a = math.ceil(periodlist[i]/3600) endtime = newstart + datetime.timedelta(hours = a) else: a = math.ceil(periodlist[i]/86400) endtime = newstart + datetime.timedelta(days = a) else: a = math.ceil(periodlist[i]/604800) endtime = newstart + datetime.timedelta(weeks = a) q.endTime = endtime querylist.append(q) return querylist
def save(self): if self.in_DB: #update SQL = "update " + Query.getDBName() + ".SCHEDULES set USERNAME = '******', SCHEDULE_NAME = '" + self.schedule_name + "'" Query.execute(SQL) else: #insert SQL = "insert into " + Query.getDBName() + ".SCHEDULES (USERNAME, SCHEDULE_NAME) values('" + self.username + "', '" + self.schedule_name + "')" Query.execute(SQL) #get the new ID and save it to the object SQL = "select * from " + Query.getDBName() + ".SCHEDULES order by ID desc" result = Query.getOneResult(SQL) self.id = result[0] self.in_DB = True
def save(self): if self.in_DB: #update SQL = "update " + Query.getDBName() + ".COORDINATES set BUILDING_ID = '" + self.building_id + "', LATITUDE = '" + self.latitude + "', LONGITUDE = '" + self.longitude + "' where ID = '" + self.id + "'" Query.execute(SQL) else: #insert SQL = "insert into " + Query.getDBName() + ".COORDINATES (BUILDING_ID, LATITUDE, LONGITUDE) values('" + self.building_id + "', '" + self.latitude + "', '" + self.longitude + "')" Query.execute(SQL) #get the new ID and save it to the object SQL = "select * from " + Query.getDBName() + ".COORDINATES order by ID desc" result = Query.getOneResult(SQL) self.id = result[0] self.in_DB = True