def main(): random.seed(123456) parser = ArgumentParser( description= 'Exhaustive (non-index-based) evaluation of re-ranking with ColBERT.') parser.add_argument('--index', dest='index', required=True) parser.add_argument('--checkpoint', dest='checkpoint', required=True) parser.add_argument('--collection', dest='collection', default='collection.tsv') parser.add_argument('--data_dir', dest='data_dir', default=DEFAULT_DATA_DIR) parser.add_argument('--output_dir', dest='output_dir', default='outputs.index/') parser.add_argument('--bsize', dest='bsize', default=128, type=int) parser.add_argument('--bytes', dest='bytes', default=2, choices=[2, 4], type=int) parser.add_argument('--subsample', dest='subsample', default=None) # TODO: Add this # TODO: For the following four arguments, default should be None. If None, they should be loaded from checkpoint. parser.add_argument('--similarity', dest='similarity', default='cosine', choices=['cosine', 'l2']) parser.add_argument('--dim', dest='dim', default=128, type=int) parser.add_argument('--query_maxlen', dest='query_maxlen', default=32, type=int) parser.add_argument('--doc_maxlen', dest='doc_maxlen', default=180, type=int) # TODO: Add resume functionality args = parser.parse_args() args.input_arguments = args args.pool = Pool(10) create_directory(args.output_dir) args.index = os.path.join(args.output_dir, args.index) args.collection = os.path.join(args.data_dir, args.collection) args.colbert, args.checkpoint = load_colbert(args) encode(args)
def __init__(self, checkpoint, model_name='bert-base-uncased', tokenizer_name='bert-base-uncased', doc_attr="body", verbose=False): args = Object() args.query_maxlen = 32 args.doc_maxlen = 180 args.dim = 128 args.bsize = 128 args.similarity = 'cosine' args.checkpoint = checkpoint args.pool = Pool(10) args.bert = model_name args.bert_tokenizer = tokenizer_name args.colbert, args.checkpoint = load_colbert(args) self.args = args self.doc_attr = doc_attr self.verbose = verbose
def main(): random.seed(123456) parser = ArgumentParser( description= "Exhaustive (non-index-based) evaluation of re-ranking with ColBERT.") parser.add_argument("--checkpoint", dest="checkpoint", required=True) parser.add_argument("--topk", dest="topK", default="top1000.dev") parser.add_argument("--qrels", dest="qrels", default="qrels.dev.small.tsv") parser.add_argument("--shortcircuit", dest="shortcircuit", default=False, action="store_true") parser.add_argument("--data_dir", dest="data_dir", default=DEFAULT_DATA_DIR) parser.add_argument("--output_dir", dest="output_dir", default="outputs.test/") parser.add_argument("--bsize", dest="bsize", default=128, type=int) parser.add_argument("--subsample", dest="subsample", default=None) # TODO: Add this parser.add_argument("--dense", action="store_true") # TODO: For the following four arguments, default should be None. If None, they should be loaded from checkpoint. parser.add_argument("--similarity", dest="similarity", default="cosine", choices=["cosine", "l2"]) parser.add_argument("--dim", dest="dim", default=128, type=int) parser.add_argument("--query_maxlen", dest="query_maxlen", default=32, type=int) parser.add_argument("--doc_maxlen", dest="doc_maxlen", default=180, type=int) parser.add_argument("--n", type=int, required=True) parser.add_argument("--k", type=float, required=True) parser.add_argument("--dont_normalize_sparse", dest="normalize_sparse", action="store_false") parser.add_argument("--use_nonneg", action="store_true") parser.add_argument("--use_ortho", action="store_true") args = parser.parse_args() args.input_arguments = args assert (not args.shortcircuit) or args.qrels, ( "Short-circuiting (i.e., applying minimal computation to queries with no positives [in the re-ranked set]) " "can only be applied if qrels is provided.") args.pool = Pool(10) args.run_name = args.topK create_directory(args.output_dir) args.topK = os.path.join(args.data_dir, args.topK) if args.qrels: args.qrels = os.path.join(args.data_dir, args.qrels) args.checkpoint_path = args.checkpoint args.colbert, args.checkpoint = load_colbert(args) args.qrels = load_qrels(args.qrels) args.queries, args.topK_docs, args.topK_pids = load_topK(args.topK) evaluate_recall(args.qrels, args.queries, args.topK_pids) evaluate(args)
def main(): random.seed(123456) parser = ArgumentParser( description= "Exhaustive (non-index-based) evaluation of re-ranking with ColBERT.") parser.add_argument("--index", dest="index", required=True) parser.add_argument("--checkpoint", dest="checkpoint", required=True) parser.add_argument("--collection", dest="collection", default="collection.tsv") parser.add_argument("--data_dir", dest="data_dir", default=DEFAULT_DATA_DIR) parser.add_argument("--output_dir", dest="output_dir", default="outputs.index/") parser.add_argument("--bsize", dest="bsize", default=128, type=int) parser.add_argument("--bytes", dest="bytes", default=4, choices=[2, 4], type=int) parser.add_argument("--subsample", dest="subsample", default=None) # TODO: Add this # TODO: For the following four arguments, default should be None. If None, they should be loaded from checkpoint. parser.add_argument("--similarity", dest="similarity", default="cosine", choices=["cosine", "l2"]) parser.add_argument("--dim", dest="dim", default=128, type=int) parser.add_argument("--query_maxlen", dest="query_maxlen", default=32, type=int) parser.add_argument("--doc_maxlen", dest="doc_maxlen", default=180, type=int) parser.add_argument("--dense", action="store_true") parser.add_argument("--n", default=4096, type=int) parser.add_argument("--k", default=0.005, type=float) parser.add_argument("--dont_normalize_sparse", dest="normalize_sparse", action="store_false") # TODO: Add resume functionality args = parser.parse_args() args.input_arguments = args args.pool = Pool(4) create_directory(args.output_dir) args.index = os.path.join(args.output_dir, args.index) args.collection = os.path.join(args.data_dir, args.collection) args.colbert, args.checkpoint = load_colbert(args) encode(args)
def main(): random.seed(123456) parser = ArgumentParser( description= 'Exhaustive (non-index-based) evaluation of re-ranking with ColBERT.') parser.add_argument('--checkpoint', dest='checkpoint', required=True) parser.add_argument('--topk', dest='topK', required=True) parser.add_argument('--qrels', dest='qrels', default=None) parser.add_argument('--shortcircuit', dest='shortcircuit', default=False, action='store_true') parser.add_argument('--data_dir', dest='data_dir', default=DEFAULT_DATA_DIR) parser.add_argument('--output_dir', dest='output_dir', default='outputs.test/') parser.add_argument('--bsize', dest='bsize', default=128, type=int) parser.add_argument('--subsample', dest='subsample', default=None) # TODO: Add this # TODO: For the following four arguments, default should be None. If None, they should be loaded from checkpoint. parser.add_argument('--similarity', dest='similarity', default='cosine', choices=['cosine', 'l2']) parser.add_argument('--dim', dest='dim', default=128, type=int) parser.add_argument('--query_maxlen', dest='query_maxlen', default=32, type=int) parser.add_argument('--doc_maxlen', dest='doc_maxlen', default=180, type=int) args = parser.parse_args() args.input_arguments = args assert (not args.shortcircuit) or args.qrels, \ "Short-circuiting (i.e., applying minimal computation to queries with no positives [in the re-ranked set]) " \ "can only be applied if qrels is provided." args.pool = Pool(10) args.run_name = args.topK create_directory(args.output_dir) args.topK = os.path.join(args.data_dir, args.topK) if args.qrels: args.qrels = os.path.join(args.data_dir, args.qrels) args.colbert, args.checkpoint = load_colbert(args) args.qrels = load_qrels(args.qrels) args.queries, args.topK_docs, args.topK_pids = load_topK(args.topK) evaluate_recall(args.qrels, args.queries, args.topK_pids) evaluate(args)