def run_search(dict_file, postings_file, queries_file, results_file): """ using the given dictionary file and postings file, perform searching on the given queries file and output the results to a file """ print('running search on the queries...') infile = open(dict_file, 'rb') new_dict = pickle.load(infile) infile.close() in_file = open(queries_file, 'r', encoding="utf8") out_file = open(results_file, 'w', encoding="utf8") query_list = in_file.read().splitlines() while query_list: query = query_list.pop(0) if (not query): out_file.write("") else: out_file.write(search(query, new_dict, postings_file)) if query_list: out_file.write('\n') in_file.close() out_file.close()
def get_search_results(**kw): """ search the catalog and return the results The request may contain additional query parameters """ query = make_query(**kw) return search(query)
def precision_at_k(k, num_relevant, query, search_type, operation_type, input=False, incorporate_pr="yes"): ''' :param k: number of retrieved documents :param query: :param search_type: :param operation_type: :param input: :param incorporate_pr: :return: out of the k retrieved documents, how many are relevant ''' retrieved = search(query, search_type, operation_type, type_input=input, incorporate_pr=incorporate_pr, verbose=False, num_results=k) # getting number of relevant documents relevant = util.get_nbest_results(query, num_relevant) print("{} relevant documents obtained".format(len(relevant))) # getting number of relevant documents that were retrieved relevant_retrieved_docs = [doc for doc in retrieved if doc in relevant] num_relevant_retrieved = len(relevant_retrieved_docs) print("{} relevant documents retrieved".format(num_relevant_retrieved)) return num_relevant_retrieved / k
def search(): if not authenticator.is_auth(flask.session): return flask.jsonify(status="auth", message="User is not authenticated") project_id = flask.request.form['project_id'] q = flask.request.form['q'] if flask.request.form['project_id'] is None: raise query.QueryException("Required parameter project_id not provided") return flask.jsonify(status="success", q=q, documents=query.summary(query.search(db(), authenticator.user_id(flask.session), project_id, q)))
def post(self): q = self.get_body_argument( "message" ) #query is a the query string. needs to be parsed and indexed. #make the function calls here to get the ranked list. The list must be called results for it to be displayed on the web. results = search(q) #end of function calls to get ranked list self.set_header("Content-Type", "text/plain") self.render('listTemplate.html', title="Results", results=results) #list variable must be called results
def get_search_results(**kw): """Search the catalog and return the results :returns: Catalog search results :rtype: list/Products.ZCatalog.Lazy.LazyMap """ # allow to search for the Plone site if kw.get("portal_type") == "Plone Site": return [get_portal()] elif kw.get("id") in PORTAL_IDS: return [get_portal()] elif kw.get("uid") in PORTAL_IDS: return [get_portal()] # build and execute a catalog query query = make_query(**kw) return search(query)
def handler(event, context): s3 = boto3.client('s3') inspector = Inspector() inspector.inspectAll() inspector.addTimeStamp("FrameWorkRuntime") service = event.get("service") bucket = event.get("bucketname") key = event.get("filename") if service == 1: filename = '/tmp/target.csv' processed_file = '/tmp/processed.csv' upload_key = 'transform.csv' s3.download_file(bucket, key, filename) processed_data = trans.process(filename) processed_data.to_csv(processed_file, index=False) s3.upload_file(processed_file, bucket, upload_key) inspector.addAttribute("numLine", processed_data.shape[0]) inspector.addAttribute("outputFile", upload_key) elif service == 2: logger = logging.getLogger() logger.setLevel(logging.INFO) data_path = '/tmp/target.csv' db_path = '/tmp/' + key.split('.')[0] + '.db' s3.download_file = 'transform.csv' load.database_init(data_path, db_path, logger) s3.upload_file(db_path, bucket, 'target.db') inspector.addAttribute("DatabaseName", "target.db") elif service == 3: where_statement = event.get("where") group_statement = event.get("group") db_path = '/tmp/target.db' if not os.path.isfile(db_path): s3.download_file(bucket, key, db_path) result, lines = query.search(db_path, where_statement, group_statement) inspector.addAttribute("data", result) inspector.addAttribute("numLine", lines) else: raise NameError("There is no such service") inspector.inspectCPUDelta() return inspector.finish()
def search(): region = request.args['region'] max = request.args['max'] min = request.args['min'] result = query.search(region, min, max) current_page = request.args.get('page', 1, type=int) items_per_page = 5 pages = round(len(result) / items_per_page + .499) from_page = int(current_page) * items_per_page - items_per_page upto_page = int(current_page) * items_per_page list_part = result[from_page:upto_page] images = [] for adv in list_part: images.append(query.get_images(adv[0])) return render_template('search.html', images=images, advertisements=list_part, pages=pages, current_page=current_page, region=region, max=max, min=min)
def submit(request): info = request.POST.get('info') url_list = search(info) return render(request, 'personal/home.html', {'search_string': url_list})
def ndcg_at_k(k, num_relevant, query, search_type, operation_type, input=False, incorporate_pr="yes"): ''' :param k: :param num_relevant: :param query: :param search_type: :param operation_type: :param input: :param incorporate_pr: :return: normalized DCG value at k ''' retrieved = search(query, search_type, operation_type, type_input=input, incorporate_pr=incorporate_pr, verbose=False, num_results=k) relevant = util.get_nbest_results(query, num_relevant) print("{} relevant documents obtained".format(len(relevant))) # defining the first 10 documents as highly relevant # (relvance 2), the remaining documents as relevant (relevance 1) # and all the rest are deemed irrelevant (relevance 0) # these values might have to be changed approprietely num_highly_relevant = 10 num_relevant = num_relevant - 10 highly_relevant = relevant[:num_highly_relevant] relevant = relevant[num_highly_relevant:] dcg = 0 rank = 1 for doc in retrieved: # find relevance of retrieved doc if doc in highly_relevant: rel_doc = 2 elif doc in relevant: rel_doc = 1 else: rel_doc = 0 # calculate discount based on rank of document if rank == 1: discount = 1 else: discount = log(rank, 2) # add discounted gain to DCG value dcg += rel_doc / discount # find number of retrieved documents by finding length of list (since it might be smaller than k) num_retrieved = len(retrieved) opt_dcg = calc_optimal_dcg(num_retrieved, num_highly_relevant, num_relevant) # divide by optimal value to get normalized DCG ndcg = dcg / opt_dcg return ndcg #print(precision_at_k(k=10,num_relevant=200,query="UCL",search_type="tfidf",operation_type="and"))
def searchUsers(self, pattern): self.cur.execute(q.search(pattern)) columns = [desc[0] for desc in self.cur.description] users = list(self.cur.fetchall()) users.insert(0, columns) return users
def main(): parser = argparse.ArgumentParser() # Currently only optional argument support (later add optional arguments using subparsers) parser.add_argument("-f", "--filename", help="you want to index a new file.") parser.add_argument("-d", "--dirname", help="recursively add a tag and description to each file in directory.") parser.add_argument("-st", "--searchtags", help="search for a file based on tags", action="store_true") parser.add_argument("-sd", "--searchdescription", help="search for a file based on its description", action="store_true") parser.add_argument("-sn", "--searchname", help="search for a file based on its name", action="store_true") args = parser.parse_args() # if a file name is provided then index this file if args.filename: name = args.filename tags, desc = checkValidity(name, 'file') if (tags, desc) == (None, None): return doc = { 'filename': abspath(name), 'filepath': path, 'tags': tags, 'description': desc } index (doc) # if a directory is provided then recursively walk the directory if args.dirname: dirname = args.dirname tags, desc = checkValidity(dirname, 'dir') if (tags, desc) == (None, None): return for root, dir, files in walk(abspath(dirname)): for name in files: print 'Now indexing file: ', root + '/' + name doc = { 'filename': name, 'filepath': root + '/' + name, 'tags': tags, 'description': desc } index(doc) # if search on tags requested if args.searchtags: try: tags = raw_input('tags to be searched [space separated]: ') except KeyboardInterrupt: print '\nExiting Gracefully...' return search('tags:' + tags) # if search on description requested if args.searchdescription: try: desc = raw_input('rough description of file: ') except KeyboardInterrupt: print '\nExiting Gracefully...' return search('description:' + desc) # if search on name requested if args.searchname: try: name = raw_input('filename to be searched: ') except KeyboardInterrupt: print '\nExiting Gracefully...' return search('filename:' + name)
elif op == 'exit': exit() elif op == 'add': name = input('enter name: ') phone = input('enter phone: ') query.add(db, table_name, name=name, phone=phone) elif op == 'delete': name = input('enter name: ') query.delete(db, table_name, name) elif op == 'view': query.select(db, table_name) elif op == 'edit': var = input('update name or phone: ') if var == 'name': phone = input('enter the phone no of which name to be updated: ') new_name = input('enter the new name: ') query.update(db, table_name, phone=phone, where_name=new_name) if var == 'phone': name = input('enter the name whose number to be updated: ') new_phone = input('enter the new phone no: ') query.update(db, table_name, name=name, where_phone=new_phone) elif op == 'search': att = input('search by name or phone: ') if att == 'name': name = input('enter the name: ') query.search(db, table_name, name=name) if att == 'phone': num = input('enter the phone no: ') query.search(db, table_name, phone=num)
revs = [x for x in revs] for rev in revs: texts.append(rev.getText().encode('utf-8')) # cs = lcp.common_substring(texts, len(texts) * 0.9, 20) # print cs # missed = 0 for ((revPrev,timePrev), (revNext,timeNext)) in gen_prev_next([(x, x.getTimestamp()) for x in revs], (None, int(time.time()))): query['q'] = ' '.join(['"%s"' % x for x in escape_variables(revPrev.getText())]) if options.namespace: query['q'] += ' namespace:%s' % options.namespace timePrev = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(timePrev)) timeNext = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(timeNext)) query['q'] += ' timestamp:[%s TO %s]' % (timePrev, timeNext) print >>sys.stderr, query res = query_func.search('localhost', 8080, query) print >>sys.stderr, res['hits_all'] inject_result(result, res) #print revPrev.getText() #print revPrev.getId(), timePrev, timeNext - timePrev, revPrev.getText()[:20] # if revPrev.getText().find(cs.decode('utf-8')) < 0: # print 'missed' , timeNext - timePrev, revPrev.getText()#, revPrev.getText().find(cs) # missed += timeNext - timePrev # else: # print 'ok' # print missed except Exception as e: traceback.print_exc(file=sys.stderr) # restore the structure of result if options.revisions:
from query import search from time import time as t title_map = {} with open("mapping.txt", encoding="utf8") as f: for line in f: doc_id = line.split()[0] try: title = " ".join(line.split()[1:]) except: title = "id:" + line.split()[0] + ". Filename not found" title_map[int(doc_id)] = title if __name__ == "__main__": print("Enter query to search and type '.' to end the loop") while True: string = input("Enter your query : ") if string.strip() == ".": break start = t() doc_list = search(string) if not doc_list: print("No results found") for k in doc_list: if k in title_map: print(title_map[k]) print("Time taken:", t() - start)
def api_search(query, banyak): return search(query, int(banyak))
def search(): args = request.values name = args.get('name', '') pageNum = int(args.get('pageNum', '1')) pageSize = int(args.get('pageSize', '10')) return query.search(name, pageNum, pageSize)