def main(): dictionary_file = get_flag_value('-d') postings_file = get_flag_value('-p') queries_file = get_flag_value('-q') output_file = get_flag_value('-o') if dictionary_file == None or \ postings_file == None or \ queries_file == None or \ output_file == None: usage() sys.exit(2) import search_index if has_flag('-time'): start = time.time() search_index.search( dictionary_file, postings_file, queries_file, output_file) if has_flag('-time'): time_taken = time.time() - start print '%s: Time taken = %.2f secs' % (queries_file, time_taken)
def search(): """ Search for food truck locations given a query and a rectangular area defined by coordinates of the south west and north east corners: query: String to search for southWestLat: Latitude of the south west corner southWestLng: Longitude of the south west corner northEastLat: Latitude of the north east corner northEastLng: Longitude of the north east corner jsoncallback: Name of the JSONP callback function wrapper :return: JSONP encoded result in the following format: jsoncallback({'success': true, 'data': [ <food_truck_data_1>, <food_truck_data_2> ]}) Where food_truck_data is a JSON object with info fields described in http://data.sfgov.org/resource/rqzj-sfat.json """ logging.debug("Incoming %s" % request.args) callback_name = request.args.get('jsoncallback') results = search_index.search(request.args.get('query'), float(request.args.get('southWestLat')), float(request.args.get('southWestLng')), float(request.args.get('northEastLat')), float(request.args.get('northEastLng'))) return Response('%s(%s)' % (callback_name, json.dumps({'success': True, 'data': results})), content_type='application/javascript; charset=utf-8')
def search_book(bookname, search_term=None): """ Search for a pattern inside a book. """ if not bookname in os.listdir(BOOK_PATH): abort(404) if not search_term: abort(400) # TODO: Verify that the book has indeed been indexed results = search_index.search(search_term, bookname=bookname) out_dict = { 'q': search_term, 'ia': bookname, 'matches': [{'text': hit['snippet'], 'par': [{ 'boxes': [ {'l': box[0], 't': box[1], 'r': box[2], 'b': box[3], 'page': hit['pagenum']} for box in hit['highlights']], 'page': hit['pagenum']}]} for hit in results] } callback = request.args.get('callback', False) if callback: data = str(jsonify(out_dict).data) content = str(callback) + "({0})".format(data) mimetype = "application/javascript" return current_app.response_class(content, mimetype=mimetype) else: return jsonify(out_dict)
def main(): dictionary_file = get_flag_value('-d') postings_file = get_flag_value('-p') queries_file = get_flag_value('-q') output_file = get_flag_value('-o') if dictionary_file == None or \ postings_file == None or \ queries_file == None or \ output_file == None: usage() sys.exit(2) import search_index if has_flag('-time'): start = time.time() search_index.search(dictionary_file, postings_file, queries_file, output_file) if has_flag('-time'): time_taken = time.time() - start print '%s: Time taken = %.2f secs' % (queries_file, time_taken)
def search_book(bookname, search_term=None): """ Search for a pattern inside a book. """ if not bookname in os.listdir(BOOK_PATH): abort(404) if not search_term: abort(400) # TODO: Verify that the book has indeed been indexed results = search_index.search(search_term, bookname=bookname) out_dict = { 'q': search_term, 'ia': bookname, 'matches': [{ 'text': hit['snippet'], 'par': [{ 'boxes': [{ 'l': box[0], 't': box[1], 'r': box[2], 'b': box[3], 'page': hit['pagenum'] } for box in hit['highlights']], 'page': hit['pagenum'] }] } for hit in results] } callback = request.args.get('callback', False) if callback: data = str(jsonify(out_dict).data) content = str(callback) + "({0})".format(data) mimetype = "application/javascript" return current_app.response_class(content, mimetype=mimetype) else: return jsonify(out_dict)
dict_of_token_frequency = {} # after construction, this contains the word as a key and the url:tf as a kv pair for url in dict_of_dicts: for word in dict_of_dicts[url]: if word not in dict_of_token_frequency: dict_of_token_frequency[word] = {url:dict_of_dicts[url][word]} else: dict_of_token_frequency[word].update({url:dict_of_dicts[url][word]}) #print(dict_of_token_frequency) index_builder = build_index.IndexBuilder() final_dict = index_builder.build_inverted_index(dict_of_token_frequency, driver.file_count) final_dict = {k: unicode(v).encode("utf-8") for k,v in final_dict.iteritems()} record1 = my_database.inverted_index_table.insert_one(final_dict) #cursor = my_collection.find() ''' for record in cursor: print(record) print("\n") ''' url_dict = driver.url_dict query_obj = get_input.query() query_token = query_obj.get_query() trimmed_query = query_obj.trim_query(query_token) search_token = query_obj.search_query(query_token) result_list = search_index.search(trimmed_query, my_collection, url_dict) for link in result_list: print(link + "\n")
def test_case_search5(self): all_objectids = [x['objectid'] for x in search_index.all_results[1:2]] results = search_index.search('cheese', 37.7860914634251, -122.398658184604, 37.7901490737255, -122.3934729318) assert_equals([x['objectid'] for x in results], all_objectids)
def test_case_search3(self): all_objectids = [x['objectid'] for x in search_index.all_results[0:3]] results = search_index.search('', 37.787, -122.398658184604, 37.7901490737255, -122.394) assert_equals([x['objectid'] for x in results], all_objectids)