def cli(port, workdir, dataset, debug): """Runs an experiment""" logging.getLogger().setLevel(logging.DEBUG if debug else logging.INFO) bm25 = BM25() # Sets the working directory and the name of the xp with experiment(workdir, "bm25", port=port) as xp: # Index the collection xp.setenv("JAVA_HOME", os.environ["JAVA_HOME"]) ds = prepare_dataset(dataset) documents = ds.documents index = IndexCollection( documents=documents, storePositions=True, storeDocvectors=True, storeContents=True, threads=CPU_COUNT, ).submit() # Search with BM25 bm25_retriever = AnseriniRetriever(k=1500, index=index, model=BM25()).tag("model", "bm25") bm25_eval = Evaluate(dataset=ds, retriever=bm25_retriever).submit() print("BM25 results on TREC 1") print(bm25_eval.results.read_text())
def process(processors, debug, gpu, port, workdir, max_epoch, batch_size, grad_acc_batch): """Runs an experiment""" logging.info("Running pipeline") logging.getLogger().setLevel(logging.DEBUG if debug else logging.INFO) info = Information() info.device = device = Device(gpu=gpu) # Sets the working directory and the name of the xp with experiment(workdir, "neural-ir", port=port) as xpm: # Misc settings random = Random() assert "JAVA_HOME" in os.environ, "JAVA_HOME should be defined (to call anserini)" xpm.setenv("JAVA_HOME", os.environ["JAVA_HOME"]) # Prepare the embeddings info.device = device for processor in processors: processor(info) assert info.datasets, "No dataset was selected" assert info.rankers, "No model was selected" for train, val, test in info.datasets: # Search and evaluate with BM25 bm25_search = (SearchCollection(index=test.index, topics=test.assessed_topics.topics, model=BM25()).tag("model", "bm25").submit()) bm25_eval = TrecEval(assessments=test.assessed_topics.assessments, run=bm25_search).submit() # Train and evaluate with each model for ranker in info.rankers: # Train with OpenNIR DRMM model predictor = Reranker(device=device, batch_size=batch_size) trainer = PointwiseTrainer(device=device, grad_acc_batch=grad_acc_batch) learner = Learner(trainer=trainer, random=random, ranker=ranker, valid_pred=predictor, train_dataset=train, val_dataset=val, max_epoch=tag(max_epoch)) model = learner.submit() # Evaluate the neural model evaluate = Evaluate(dataset=test, model=model, predictor=predictor).submit() xpm.wait() print(f"Results for DRMM\n{evaluate.results.read_text()}\n") print(f"Results for BM25\n{bm25_eval.results.read_text()}\n")
def cli(port, workdir, debug): """Runs an experiment""" logging.getLogger().setLevel(logging.DEBUG if debug else logging.INFO) bm25 = BM25() # Sets the working directory and the name of the xp with experiment(workdir, "index", port=port) as xp: train_ds = RandomDataset()
def __enter__(self) -> experiment: if self.clean_workdir: self.workdir = TemporaryDirectory(prefix="xpm", suffix=self.name) workdir = self.workdir.__enter__() else: workdir = self.workdir self.experiment = experiment(workdir, self.name, port=self.port) self.experiment.__enter__() # Set some useful environment variables self.experiment.workspace.launcher.setenv( "PYTHONPATH", str(Path(__file__).parents[2])) self.timeout.__enter__() logging.info("Created new temporary experiment (%s)", workdir) return self.experiment
def on_button_clicked(self, b): with self.output: if experiment.CURRENT: try: experiment.CURRENT.__exit__(None, None, None) except Exception: print("Error while stopping experimaestro") self.current = experiment.CURRENT else: self.current = experiment(self.name, self.name, host="localhost", port=self.port).__enter__() for key, value in self.environment.items(): self.current.setenv(key, value) if self.hook: self.hook(self) self.refresh()
def process(processors, debug, gpu, port, workdir, max_epoch, batch_size): """Runs an experiment""" logging.info("Running pipeline") logging.getLogger().setLevel(logging.DEBUG if debug else logging.INFO) info = Information() info.device = device = Device(gpu=gpu) # Sets the working directory and the name of the xp with experiment(workdir, "neural-ir", port=port) as xpm: # Misc settings__xpm__ # Prepare the embeddings info.device = device for processor in processors: processor(info) assert info.trainer, "No trainer was selected" assert info.train_sampler, "No train sampler was selected" assert info.dev, "No dev dataset was selected" assert info.test, "No test dataset was selected" random_scorer = RandomScorer(random=info.random).tag("model", "random") # Retrieve the top 1000 topK = 1000 # 1000 documents used for cross-validation valtopK = 100 def get_retriever(index, scorer, topk=topK): base_retriever = AnseriniRetriever(k=topk, index=index, model=info.basemodel) return TwoStageRetriever(retriever=base_retriever, scorer=scorer, batchsize=batch_size) val_index, test_index = [ info.index(c.documents) for c in (info.dev, info.test) ] # Search and evaluate with BM25 bm25_retriever = AnseriniRetriever(k=topK, index=test_index, model=info.basemodel).tag( "model", "bm25") bm25_eval = Evaluate(dataset=info.test, retriever=bm25_retriever).submit() # Performance of random random_eval = Evaluate(dataset=info.test, retriever=get_retriever( test_index, random_scorer)).submit() # Train and evaluate with each model for scorer in info.scorers: # Train with OpenNIR DRMM model # predictor = Reranker(device=device, batch_size=batch_size) trainer = info.trainer validation = Validation(dataset=info.dev, retriever=get_retriever( val_index, scorer, valtopK)) learner = Learner( trainer=trainer, random=info.random, scorer=scorer, max_epoch=tag(max_epoch), validation=validation, ) model = learner.submit() # Evaluate the neural model evaluate = Evaluate(dataset=info.test, retriever=get_retriever(test_index, model)).submit() xpm.wait() print(f"===") print(f"Results for BM25\n{bm25_eval.results.read_text()}\n") print(f"Results for DRMM\n{evaluate.results.read_text()}\n") print(f"Results for random\n{random_eval.results.read_text()}\n")
import importlib import sys from experimaestro import experiment if __name__ == "__main__": wspath, module, functionname = sys.argv[1:] print("Importing", module) f = getattr(importlib.import_module(module), functionname) with experiment(wspath, "restart") as xp: f(xp)