Beispiel #1
0
    output_desktop.close()
    output_mobile.close()

    import time
    t0 = time.time()
    for query in queries:
        print query, time.time() - t0
        query_id = query['id']
        query_str = query['query']

        tmp = "%s/%s" % (base_tmp, query_id)
        os.mkdir(tmp)
        ini['tmp_folder'] = tmp

        if web_search.find_query(cursor, query_str, 'NTCIR') is None:
            register_query_pages(query_id, query_str, cursor, ntcir_urls_folder, ntcir_htmls_folder, cache_folder)
            conn.commit()

        (results, html_urls) = one_click_search(ini, query_str, [(1000, 'DESKTOP'), (280, 'MOBILE')])
        for (output_file, result) in [(output_file_desktop, results['DESKTOP']), (output_file_mobile, results['MOBILE'])]:
            output = file(output_file, 'a')
            output.write('%s\tOUT\t%s\n' % (query_id, re.sub('\n', ' ', result[0])))
            printed = set()
            for evidence in result[1]:
                #TODO evidence id to page
                if not evidence in printed:
                    url = html_urls[int(evidence)]
                    url = re.sub('.*/', '', url)
                    output.write('%s\tSOURCE\t%s\n' % (query_id, url))
                    printed.add(evidence)
import web_search

if __name__ == "__main__":
    cache_folder = sys.argv[1]
    search_engine = sys.argv[2]
    html_file = sys.argv[3]
    query_str = " ".join(sys.argv[4:])

    print "cache folder", cache_folder
    print "search engine", search_engine
    print "urls", html_file
    print "query", query_str

    (conn, cursor) = web_search.open_db(cache_folder)

    if web_search.find_query(cursor, query_str, search_engine) is not None:
        sys.exit("Query already in index")

    cache_files_folder = "%s/pages" % (cache_folder,)
    print "page cache folder", cache_files_folder
    try:
        os.mkdir(cache_files_folder)
        print "creating page cache folder", cache_files_folder
    except object as exc:
        print "(warning) problem creating", cache_files_folder, exc
    except OSError:
        pass

    qid = web_search.add_query(cursor, query_str, search_engine)

    rank = 0
Beispiel #3
0
import web_search

if __name__ == '__main__':
    cache_folder = sys.argv[1]
    search_engine = sys.argv[2]
    html_file = sys.argv[3]
    query_str = " ".join(sys.argv[4:])

    print "cache folder", cache_folder
    print "search engine", search_engine
    print "urls", html_file
    print "query", query_str

    (conn, cursor) = web_search.open_db(cache_folder)

    if web_search.find_query(cursor, query_str, search_engine) is not None:
        sys.exit("Query already in index")

    cache_files_folder = "%s/pages" % (cache_folder, )
    print "page cache folder", cache_files_folder
    try:
        os.mkdir(cache_files_folder)
        print "creating page cache folder", cache_files_folder
    except object as exc:
        print "(warning) problem creating", cache_files_folder, exc
    except OSError:
        pass

    qid = web_search.add_query(cursor, query_str, search_engine)

    rank = 0
Beispiel #4
0
    output_desktop.close()
    output_mobile.close()

    import time
    t0 = time.time()
    for query in queries:
        print query, time.time() - t0
        query_id = query['id']
        query_str = query['query']

        tmp = "%s/%s" % (base_tmp, query_id)
        os.mkdir(tmp)
        ini['tmp_folder'] = tmp

        if web_search.find_query(cursor, query_str, 'NTCIR') is None:
            register_query_pages(query_id, query_str, cursor,
                                 ntcir_urls_folder, ntcir_htmls_folder,
                                 cache_folder)
            conn.commit()

        (results, html_urls) = one_click_search(ini, query_str,
                                                [(1000, 'DESKTOP'),
                                                 (280, 'MOBILE')])
        for (output_file, result) in [(output_file_desktop,
                                       results['DESKTOP']),
                                      (output_file_mobile, results['MOBILE'])]:
            output = file(output_file, 'a')
            output.write('%s\tOUT\t%s\n' %
                         (query_id, re.sub('\n', ' ', result[0])))
            printed = set()