def init_query_encoder(encoder, topics_name, encoded_queries, device): encoded_queries_map = { 'msmarco-passage-dev-subset': 'msmarco-passage-dev-subset-tct_colbert', 'dpr-nq-dev': 'dpr-nq-dev-multi', 'dpr-nq-test': 'dpr-nq-test-multi', 'dpr-trivia-dev': 'dpr-trivia-dev-multi', 'dpr-trivia-test': 'dpr-trivia-test-multi', 'dpr-wq-test': 'dpr-wq-test-multi', 'dpr-squad-test': 'dpr-squad-test-multi', 'dpr-curated-test': 'dpr-curated-test-multi' } if encoder: if 'dpr' in encoder: return DPRQueryEncoder(encoder_dir=encoder, device=device) elif 'tct_colbert' in encoder: return TCTColBERTQueryEncoder(encoder_dir=encoder, device=device) elif 'ance' in encoder: return AnceQueryEncoder(encoder_dir=encoder, device=device) elif 'sentence' in encoder: return SBERTQueryEncoder(encoder_dir=encoder, device=device) else: return AutoQueryEncoder(encoder_dir=encoder, device=device) if encoded_queries: if os.path.exists(encoded_queries): return QueryEncoder(encoded_queries) return QueryEncoder.load_encoded_queries(encoded_queries) if topics_name in encoded_queries_map: return QueryEncoder.load_encoded_queries( encoded_queries_map[topics_name]) return None
def init_query_encoder(encoder, topics_name, encoded_queries, device): encoded_queries_map = { 'msmarco-passage-dev-subset': 'tct_colbert-msmarco-passage-dev-subset', 'dpr-nq-dev': 'dpr_multi-nq-dev', 'dpr-nq-test': 'dpr_multi-nq-test', 'dpr-trivia-dev': 'dpr_multi-trivia-dev', 'dpr-trivia-test': 'dpr_multi-trivia-test', 'dpr-wq-test': 'dpr_multi-wq-test', 'dpr-squad-test': 'dpr_multi-squad-test', 'dpr-curated-test': 'dpr_multi-curated-test' } if encoder: if 'dpr' in encoder: return DprQueryEncoder(encoder_dir=encoder, device=device) elif 'tct_colbert' in encoder: return TctColBertQueryEncoder(encoder_dir=encoder, device=device) elif 'ance' in encoder: return AnceQueryEncoder(encoder_dir=encoder, device=device) elif 'sentence' in encoder: return AutoQueryEncoder(encoder_dir=encoder, device=device, pooling='mean', l2_norm=True) else: return AutoQueryEncoder(encoder_dir=encoder, device=device) if encoded_queries: if os.path.exists(encoded_queries): return QueryEncoder(encoded_queries) return QueryEncoder.load_encoded_queries(encoded_queries) if topics_name in encoded_queries_map: return QueryEncoder.load_encoded_queries( encoded_queries_map[topics_name]) return None
def init_query_encoder(encoder, tokenizer_name, topics_name, encoded_queries, device, prefix): encoded_queries_map = { 'msmarco-passage-dev-subset': 'tct_colbert-msmarco-passage-dev-subset', 'dpr-nq-dev': 'dpr_multi-nq-dev', 'dpr-nq-test': 'dpr_multi-nq-test', 'dpr-trivia-dev': 'dpr_multi-trivia-dev', 'dpr-trivia-test': 'dpr_multi-trivia-test', 'dpr-wq-test': 'dpr_multi-wq-test', 'dpr-squad-test': 'dpr_multi-squad-test', 'dpr-curated-test': 'dpr_multi-curated-test' } if encoder: if 'dkrr' in encoder: return DkrrDprQueryEncoder(encoder_dir=encoder, device=device, prefix=prefix) elif 'dpr' in encoder: return DprQueryEncoder(encoder_dir=encoder, tokenizer_name=tokenizer_name, device=device) elif 'bpr' in encoder: return BprQueryEncoder(encoder_dir=encoder, tokenizer_name=tokenizer_name, device=device) elif 'tct_colbert' in encoder: return TctColBertQueryEncoder(encoder_dir=encoder, tokenizer_name=tokenizer_name, device=device) elif 'ance' in encoder: return AnceQueryEncoder(encoder_dir=encoder, tokenizer_name=tokenizer_name, device=device) elif 'sentence' in encoder: return AutoQueryEncoder(encoder_dir=encoder, tokenizer_name=tokenizer_name, device=device, pooling='mean', l2_norm=True) else: return AutoQueryEncoder(encoder_dir=encoder, tokenizer_name=tokenizer_name, device=device) if encoded_queries: if os.path.exists(encoded_queries): if 'bpr' in encoded_queries: return BprQueryEncoder(encoded_query_dir=encoded_queries) else: return QueryEncoder(encoded_queries) return QueryEncoder.load_encoded_queries(encoded_queries) if topics_name in encoded_queries_map: return QueryEncoder.load_encoded_queries( encoded_queries_map[topics_name]) raise ValueError(f'No encoded queries for topic {topics_name}')