Example #1
0
    with open(output_path, 'w') as target_file:
        batch_topics = list()
        batch_topic_ids = list()
        for index, (topic_id, text) in enumerate(
                tqdm(list(query_iterator(topics, order)))):
            if args.batch_size <= 1 and args.threads <= 1:
                hits = searcher.search(text, args.hits)
                results = [(topic_id, hits)]
            else:
                batch_topic_ids.append(str(topic_id))
                batch_topics.append(text)
                if (index + 1) % args.batch_size == 0 or \
                        index == len(topics.keys()) - 1:
                    results = searcher.batch_search(batch_topics,
                                                    batch_topic_ids, args.hits,
                                                    args.threads)
                    results = [(id_, results[id_]) for id_ in batch_topic_ids]
                    batch_topic_ids.clear()
                    batch_topics.clear()
                else:
                    continue

            for result in results:
                if args.max_passage:
                    write_result_max_passage(target_file, result,
                                             args.max_passage_delimiter,
                                             args.max_passage_hits,
                                             args.msmarco, tag)
                else:
                    write_result(target_file, result, args.hits, args.msmarco,
Example #2
0
# build output path
output_path = args.output

print(f'Running {args.topics} topics, saving to {output_path}...')
tag = 'Faiss'

if args.batch > 1:
    with open(output_path, 'w') as target_file:
        topic_keys = sorted(topics.keys())
        for i in tqdm(range(0, len(topic_keys), args.batch)):
            topic_key_batch = topic_keys[i:i + args.batch]
            topic_batch = [
                topics[topic].get('title').strip() for topic in topic_key_batch
            ]
            hits = searcher.batch_search(topic_batch,
                                         topic_key_batch,
                                         k=args.hits,
                                         threads=args.threads)
            for topic in hits:
                for idx, hit in enumerate(hits[topic]):
                    if args.msmarco:
                        target_file.write(f'{topic}\t{hit.docid}\t{idx + 1}\n')
                    else:
                        target_file.write(
                            f'{topic} Q0 {hit.docid} {idx + 1} {hit.score:.6f} {tag}\n'
                        )
    exit()

with open(output_path, 'w') as target_file:
    for index, topic in enumerate(tqdm(sorted(topics.keys()))):
        search = topics[topic].get('title').strip()
        hits = searcher.search(search, args.hits, threads=args.threads)