Ejemplo n.º 1
0
def main():
    dictionary_file = get_flag_value('-d')
    postings_file = get_flag_value('-p')
    queries_file = get_flag_value('-q')
    output_file = get_flag_value('-o')

    if dictionary_file == None or \
            postings_file == None or \
            queries_file == None or \
            output_file == None:
        usage()
        sys.exit(2)

    import search_index


    if has_flag('-time'):
        start = time.time()

    search_index.search(
        dictionary_file,
        postings_file,
        queries_file,
        output_file)

    if has_flag('-time'):
        time_taken = time.time() - start
        print '%s: Time taken = %.2f secs' % (queries_file, time_taken)
Ejemplo n.º 2
0
def search():
    """
    Search for food truck locations given a query and a rectangular area defined by coordinates of the south west
    and north east corners:
    query: String to search for
    southWestLat: Latitude of the south west corner
    southWestLng: Longitude of the south west corner
    northEastLat: Latitude of the north east corner
    northEastLng: Longitude of the north east corner
    jsoncallback: Name of the JSONP callback function wrapper

    :return: JSONP encoded result in the following format:
    jsoncallback({'success': true, 'data': [
    <food_truck_data_1>,
    <food_truck_data_2>
    ]})

    Where food_truck_data is a JSON object with info fields described in http://data.sfgov.org/resource/rqzj-sfat.json
    """
    logging.debug("Incoming %s" % request.args)
    callback_name = request.args.get('jsoncallback')
    results = search_index.search(request.args.get('query'),
                                 float(request.args.get('southWestLat')),
                                 float(request.args.get('southWestLng')),
                                 float(request.args.get('northEastLat')),
                                 float(request.args.get('northEastLng')))
    return Response('%s(%s)' % (callback_name, json.dumps({'success': True, 'data': results})),
                    content_type='application/javascript; charset=utf-8')
Ejemplo n.º 3
0
def search_book(bookname, search_term=None):
    """ Search for a pattern inside a book. """
    if not bookname in os.listdir(BOOK_PATH):
        abort(404)
    if not search_term:
        abort(400)
    # TODO: Verify that the book has indeed been indexed
    results = search_index.search(search_term, bookname=bookname)
    out_dict = {
        'q': search_term,
        'ia': bookname,
        'matches': [{'text': hit['snippet'],
                     'par': [{
                         'boxes': [
                             {'l': box[0], 't': box[1], 'r': box[2],
                              'b': box[3], 'page': hit['pagenum']}
                             for box in hit['highlights']],
                         'page': hit['pagenum']}]} for hit in results]
    }
    callback = request.args.get('callback', False)
    if callback:
        data = str(jsonify(out_dict).data)
        content = str(callback) + "({0})".format(data)
        mimetype = "application/javascript"
        return current_app.response_class(content, mimetype=mimetype)
    else:
        return jsonify(out_dict)
Ejemplo n.º 4
0
def main():
    dictionary_file = get_flag_value('-d')
    postings_file = get_flag_value('-p')
    queries_file = get_flag_value('-q')
    output_file = get_flag_value('-o')

    if dictionary_file == None or \
            postings_file == None or \
            queries_file == None or \
            output_file == None:
        usage()
        sys.exit(2)

    import search_index

    if has_flag('-time'):
        start = time.time()

    search_index.search(dictionary_file, postings_file, queries_file,
                        output_file)

    if has_flag('-time'):
        time_taken = time.time() - start
        print '%s: Time taken = %.2f secs' % (queries_file, time_taken)
Ejemplo n.º 5
0
def search_book(bookname, search_term=None):
    """ Search for a pattern inside a book. """
    if not bookname in os.listdir(BOOK_PATH):
        abort(404)
    if not search_term:
        abort(400)
    # TODO: Verify that the book has indeed been indexed
    results = search_index.search(search_term, bookname=bookname)
    out_dict = {
        'q':
        search_term,
        'ia':
        bookname,
        'matches': [{
            'text':
            hit['snippet'],
            'par': [{
                'boxes': [{
                    'l': box[0],
                    't': box[1],
                    'r': box[2],
                    'b': box[3],
                    'page': hit['pagenum']
                } for box in hit['highlights']],
                'page':
                hit['pagenum']
            }]
        } for hit in results]
    }
    callback = request.args.get('callback', False)
    if callback:
        data = str(jsonify(out_dict).data)
        content = str(callback) + "({0})".format(data)
        mimetype = "application/javascript"
        return current_app.response_class(content, mimetype=mimetype)
    else:
        return jsonify(out_dict)
Ejemplo n.º 6
0
        dict_of_token_frequency = {} # after construction, this contains the word as a key and the url:tf as a kv pair
        for url in dict_of_dicts:
            for word in dict_of_dicts[url]:
                if word not in dict_of_token_frequency:
                    dict_of_token_frequency[word] = {url:dict_of_dicts[url][word]}
                else:
                    dict_of_token_frequency[word].update({url:dict_of_dicts[url][word]})
        #print(dict_of_token_frequency)

        index_builder = build_index.IndexBuilder()
        final_dict = index_builder.build_inverted_index(dict_of_token_frequency, driver.file_count)

        final_dict = {k: unicode(v).encode("utf-8") for k,v in final_dict.iteritems()}
        record1 = my_database.inverted_index_table.insert_one(final_dict)

        #cursor = my_collection.find()
        '''
        for record in cursor:
                print(record)
                print("\n")
        '''
        url_dict = driver.url_dict
        query_obj = get_input.query()
        query_token = query_obj.get_query()

        trimmed_query = query_obj.trim_query(query_token)
        search_token = query_obj.search_query(query_token)
        result_list = search_index.search(trimmed_query, my_collection, url_dict)
        for link in result_list:
                print(link + "\n")
        
Ejemplo n.º 7
0
 def test_case_search5(self):
     all_objectids = [x['objectid'] for x in search_index.all_results[1:2]]
     results = search_index.search('cheese', 37.7860914634251, -122.398658184604, 37.7901490737255, -122.3934729318)
     assert_equals([x['objectid'] for x in results],
                  all_objectids)
Ejemplo n.º 8
0
 def test_case_search3(self):
     all_objectids = [x['objectid'] for x in search_index.all_results[0:3]]
     results = search_index.search('', 37.787, -122.398658184604, 37.7901490737255, -122.394)
     assert_equals([x['objectid'] for x in results],
                  all_objectids)