def debug( task: str, dataset: str, old_rev: str, datapoint: Optional[int], metric: str, n_datapoints: Optional[int], reverse: bool, force_refresh: bool, verbose: bool = False, ) -> None: from bohr.debugging import DataPointDebugger, DatasetDebugger setup_loggers(verbose) try: if datapoint is None: dataset_debugger = DatasetDebugger(task, dataset, old_rev, force_update=force_refresh) dataset_debugger.show_datapoints(metric, n_datapoints or 10, reverse=reverse) else: DataPointDebugger( task, dataset, old_rev, force_update=force_refresh).show_datapoint_info(datapoint) except dvc.scm.base.RevError: logger.error(f"Revision does not exist: {old_rev}") exit(23) except dvc.exceptions.PathMissingError: logger.error(f"Dataset {dataset} or task {task} does not exist.") exit(24)
def label_dataset(task: str, dataset: str, debug: bool): from bohr.pipeline.label_dataset import label_dataset setup_loggers() bohr_repo = load_bohr_repo() task = bohr_repo.tasks[task] dataset = bohr_repo.datasets[dataset] label_dataset(task, dataset, debug=debug)
def train_label_model(task: str, target_dataset: str): from bohr.pipeline.train_label_model import train_label_model setup_loggers() bohr_repo = load_bohr_repo() path_config = PathConfig.load() task = bohr_repo.tasks[task] target_dataset = bohr_repo.datasets[target_dataset] stats = train_label_model(task, target_dataset, path_config) with open(path_config.metrics / task.name / "label_model_metrics.json", "w") as f: json.dump(stats, f) pprint(stats)
def apply_heuristics(task: str, heuristic_group: Optional[str], dataset: Optional[str], profile: bool): from bohr.pipeline.apply_heuristics import apply_heuristics from bohr.pipeline.combine_heuristics import combine_applied_heuristics setup_loggers() bohr_repo = load_bohr_repo() task = bohr_repo.tasks[task] if heuristic_group: with Profiler(enabled=profile): dataset = bohr_repo.datasets[dataset] apply_heuristics(task, heuristic_group, dataset) else: combine_applied_heuristics(task)
def __enter__(self): add_to_local_config("core.verbose", str(self.verbose)) setup_loggers(self.verbose) return self
def refresh(verbose: bool = False): setup_loggers(verbose) api.refresh()
def status(verbose: bool = False): setup_loggers(verbose) print(api.status())
def parse_labels(): from bohr.pipeline.parse_labels import parse_labels setup_loggers() parse_labels()