with open(query_file, encoding='utf-8') as file:
            parsed = BeautifulSoup(file, "html.parser")

        query_list = parsed.find_all("topic")
        print("There are %s queries." % len(query_list))
        combined_stats = Stats()
        fileids = set()

        ##          try:
        query_list_m = list(map(get_query, query_list))  # whole batch for now
        args = [(query_list_m, topk, math_index)]

        for p in args:  # single-process execution
            (fileid, stats) = process_query_batch(p)
            fileids.add(fileid)
            combined_stats.add(stats)
##          except Exception as err:
##              reason = str(err)
##              print("Failed to process queries: "+reason, file=sys.stderr)

        cntl.store("query_fileids", str(fileids))

        print("Done preparing query batch for %s" % (query_file))
        combined_stats.dump()

        cntl.dump()  # output the revised cntl file

        end = time.time()
        elapsed = end - start

        print("Elapsed time %s" % (elapsed))
Beispiel #2
0
            with open(query_file, encoding='utf-8') as file:
                parsed = BeautifulSoup(file, "lxml")

            query_list = parsed.find_all("topic")
            print("There are %s queries." % (len(query_list)), flush=True)
            combined_stats = Stats()
            fileids = set()

            try:
                query_list_m = list(map(get_query,query_list)) # whole batch for now
                args = [(system, db, run_tag, query_list_m, topk, math_index, weighting_strategy)]
            
                for p in args:  # single-process execution
                    (fileid,stats) = process_query_batch(p)
                    fileids.add(fileid)
                    combined_stats.add(stats)
            except Exception as err:
                reason = str(err)
                print("Failed to process document "+filename+": "+reason, file=sys.stderr)
                combined_stats.problem_files[reason] = combined_stats.problem_files.get(reason, set())
                combined_stats.problem_files[reason].add(filename)

            cntl.store("query_fileids",str(fileids))
            
            print("Done preparing query batch for %s against %s" % (query_file, db))
            combined_stats.dump()

            cntl.dump()  # output the revised cntl file

            end = time.time()
            elapsed = end - start