def make_numbers(**options): from csevo.Table import Table table_maker = Table() model = options.get("model") use_latest = Utils.get_option_as_boolean(options, "use_latest") debug = Utils.get_option_as_boolean(options, "debug") table_maker.make_numbers_model_results(model, use_latest, debug)
def process_data_shared(**options): from csevo.processor.DataProcessor import DataProcessor output_dir = options.get("output_dir", Macros.data_dir / "models-data") task = options.get("task") years = Utils.get_option_as_list(options, "years") eval_settings = Utils.get_option_as_list(options, "eval_settings") dp = DataProcessor() dp.process_shared(output_dir, years, eval_settings, task) return
def prepare_model_local(**options): from csevo.ml.LocalRunner import LocalRunner work_dir = Path(options.get("work_dir", Macros.data_dir / "models-work")) model = options["model"] use_latest = Utils.get_option_as_boolean(options, "use_latest") debug = Utils.get_option_as_boolean(options, "debug") cross_proj = Utils.get_option_as_boolean(options, "cross_proj") runner = LocalRunner(work_dir) runner.prepare_model(model, use_latest, debug, cross_proj) return
def store_repo_results(**options): from csevo.collector.Database import Database from csevo.collector.DataCollector import DataCollector local = Utils.get_option_as_boolean(options, "local") force_update = Utils.get_option_as_boolean(options, "force_update") repos_results_dir = Path( options.get("repos_results_dir", Macros.repos_results_dir)) db = Database(local=local) dc = DataCollector(database=db) dc.store_repo_results(repos_results_dir, force_update=force_update) return
def make_plots(**options): from csevo.Plot import Plot which = Utils.get_option_as_list(options, "which") plot_maker = Plot() plot_maker.make_plots(which, options) return
def make_tables(**options): from csevo.Table import Table which = Utils.get_option_as_list(options, "which") table_maker = Table() table_maker.make_tables(which, options) return
def collect_model_results(**options): from csevo.collector.ModelResultsCollector import ModelResultsCollector collector = ModelResultsCollector() model = options.get("model") task = options.get("task") re_eval = Utils.get_option_as_boolean(options, "re_eval") collector.collect_results(model, task, re_eval)
def split_project_data(**options): from csevo.filter.DataSpliter import DataSpliter spliter = DataSpliter() task = options.get("task") debug = Utils.get_option_as_boolean(options, "debug") spliter.project_data_split(task, debug) return
def make_plots(self, which, options: dict): for item in which: if item == "draft-learning-curve": # TODO: outdated (->remove) training_log_path = Path(options.get("training-log-path")) output_name = options.get("output-name") self.make_plot_draft_learning_curve(training_log_path, output_name) elif item == "models-results-metrics-dist": task = options["task"] models = Utils.get_option_as_list(options, "models", self.TASK_2_MODELS.get(task)) metrics = Utils.get_option_as_list(options, "metrics", self.TASK_2_METRICS.get(task)) exps = Utils.get_option_as_list(options, "exps", self.EXPS) self.plot_models_results_metrics_dist(task, models, metrics, exps) elif item == "models-results-variance-dist": task = options["task"] models = Utils.get_option_as_list(options, "models", self.TASK_2_MODELS.get(task)) metrics = Utils.get_option_as_list(options, "metrics", self.TASK_2_METRICS.get(task)) exps = Utils.get_option_as_list(options, "exps", self.EXPS) self.plot_models_results_variance_dist(task, models, metrics, exps) elif item == "num-data-evolution": self.plot_num_data_evolution( Utils.get_option_as_list(options, "years", self.EVO_YEARS), ) else: self.logger.warning(f"Unknown plot {item}") # end if # end for return
def run_models(**options): from csevo.ml.TACCRunner import TACCRunner work_dir = Path(options.get("work_dir", Macros.data_dir / "models-work")) mode = options.get("mode", Macros.train) models = Utils.get_option_as_list(options, "models") exps = Utils.get_option_as_list(options, "exps") trials = Utils.get_option_as_list(options, "trials") timeout = options.get("timeout") beg = options.get("beg", 0) cnt = options.get("cnt", -1) local = Utils.get_option_as_boolean(options, "local") runner = TACCRunner(work_dir) if not local: runner.run_models(mode, models, exps, trials, timeout, beg, cnt) else: runner.run_models_local(mode, models, exps, trials, timeout, beg, cnt) return
def split_dataset(**options): from csevo.collector.Database import Database from csevo.processor.DatasetSplitter import DatasetSplitter local = Utils.get_option_as_boolean(options, "local") db = Database(local=local) ds = DatasetSplitter(database=db) ds.split_dataset() return
def split_projects(**options): from csevo.filter.DataSpliter import DataSpliter spliter = DataSpliter() random_seed = options.get("random_seed") debug = Utils.get_option_as_boolean(options, "debug") if debug: method_file = Macros.data_dir / "latest-debug" / "method-data.json" else: method_file = Macros.data_dir / "latest" / "method-data.json" spliter.split_project(method_file, random_seed, debug)
def prepare_model(**options): from csevo.ml.TACCRunner import TACCRunner work_dir = Path(options.get("work_dir", Macros.data_dir / "models-work")) model = options["model"] year = options["year"] eval_setting = options["eval_setting"] debug = Utils.get_option_as_boolean(options, "debug") runner = TACCRunner(work_dir) runner.prepare_model(model, year, eval_setting, debug) return
def collect_data(**options): from csevo.collector.DataCollector import DataCollector project_urls_file = Path( options.get("project_urls_file", Macros.data_dir / "projects-github-dpcom.txt")) skip_collected = Utils.get_option_as_boolean(options, "skip_collected") beg = options.get("beg") cnt = options.get("cnt") dc = DataCollector() dc.collect_projects(project_urls_file, skip_collected=skip_collected, beg=beg, cnt=cnt) return