from capreolus import ConfigOption, Dependency, evaluator from capreolus.task import Task from capreolus.utils.loginit import get_logger logger = get_logger(__name__) # pylint: disable=invalid-name @Task.register class TutorialTask(Task): module_name = "tutorial" config_spec = [ ConfigOption("optimize", "map", "metric to maximize on the validation set") ] dependencies = [ Dependency(key="benchmark", module="benchmark", name="nf", provide_this=True, provide_children=["collection"]), Dependency(key="searcher1", module="searcher", name="BM25RM3"), Dependency(key="searcher2", module="searcher", name="SDM"), ] commands = ["run"] + Task.help_commands default_command = "run" def run(self): output_dir = self.get_results_path() # read the title queries from the chosen benchmark's topic file
import contextlib import numpy as np import torch from torch import nn from transformers import BertForNextSentencePrediction from capreolus import ConfigOption, Dependency from capreolus.reranker import Reranker from capreolus.utils.loginit import get_logger logger = get_logger(__name__) # official weights converted with: # def convert(name): # from transformers import BertTokenizer, BertForNextSentencePrediction, TFBertForNextSentencePrediction # tokenizer = BertTokenizer.from_pretrained("bert-large-uncased") # state = torch.load(f"/GW/NeuralIR/nobackup/birch-emnlp_bert4ir_v2/models/saved.{name}_1", map_location="cpu") # model = BertForNextSentencePrediction.from_pretrained("bert-large-uncased") # model.load_state_dict(state["model"].state_dict()) # output = f"/GW/NeuralIR/nobackup/birch-emnlp_bert4ir_v2/models/export/birch-bert-large-{name}" # os.makedirs(output, exist_ok=True) # model.save_pretrained(output) # tokenizer.save_pretrained(output) # # tf2 support # tf_model = TFBertForNextSentencePrediction.from_pretrained(output, from_pt=True) # tf_model.save_pretrained(output)