def executeTask(t): actualArgs = {} config = exp.parse_config() context.projectPath = project.path for p in self.sig.parameters: par = self.sig.parameters[p] type = par.annotation if issubclass(type, musket_core.model.Model): if p in args: actualArgs[p] = args[p].wrap(config, exp) else: actualArgs[p] = config elif issubclass(type, musket_core.datasets.DataSet): actualArgs[p] = config.get_dataset(args[p]) if not hasattr(actualArgs[p], "name") and not hasattr( actualArgs[p], "origName"): actualArgs[p].name = args[p] pass else: if p in args: actualArgs[p] = args[p] taskFolder = os.path.join(os.path.dirname(config.path), self.name) utils.ensure(taskFolder) os.chdir(taskFolder) self.func(**actualArgs) t.results = taskFolder
def calculate(self) -> DataSet: ds = self.srcDataset nm = self.name if ds is None: if self.name is not None and isinstance(self.name, str): if self.name == "holdout": ds = self.cfg.holdout() elif self.name == "validation": ds = self.cfg.validation(None, self.fold) else: ds = self.cfg.get_dataset(self.name) else: nm = ds.name if ds is None: raise ValueError("No dataset has been specified for prediction") ensure(constructPredictionsDirPath(self.cfg.directory())) path = f"{constructPredictionsDirPath(self.cfg.directory())}/{nm}{str(self.stage)}{str(self.fold)}.npy" if os.path.exists(path): return self.cfg.load_writeable_dataset(ds, path) if self.cfg.separatePredictions: if isinstance(self.fold, list) and len(self.fold) > 1: prs = [ Prediction(self.cfg, v, self.stage, self.name, self.srcDataset).calculate() for v in self.fold ] vls = self.cfg.createPredictionsBlend(prs) wd = self.cfg.create_writeable_dataset(vls, path) for i in tqdm(vls, "Blending"): wd.append(i.prediction) wd.commit() return self.cfg.load_writeable_dataset(ds, path) if isinstance(self.stage, list) and len(self.stage) > 1: prs = [ Prediction(self.cfg, self.fold, v, self.name, self.srcDataset).calculate() for v in self.stage ] vls = self.cfg.createPredictionsBlend(prs) wd = self.cfg.create_writeable_dataset(vls, path) for i in tqdm(vls, "Blending"): wd.append(i.prediction) wd.commit() return self.cfg.load_writeable_dataset(ds, path) pass if self.cfg.needsSessionForPrediction: #K.clear_session() try: with self.create_session().as_default(): value = self.cfg.predict_all_to_dataset( ds, self.fold, self.stage, -1, None, False, path) finally: K.clear_session() else: value = self.cfg.predict_all_to_dataset(ds, self.fold, self.stage, -1, None, False, path) return value
def __init__(self, project: musket_projects.Project): tasks.Task.__init__(self) self.project = project self.process = None musket_utils.ensure(self.report_dir())
def apply(self, all=False): if self.hyperparameters() is not None: return [self] m = self.config() if "num_seeds" in m: paths = [] for i in range(m["num_seeds"]): i_ = self.path + "/" + str(i) ensure(i_) if not all: if Experiment(i_).isCompleted(): continue s = random.randint(0, 100000) if not all or not os.path.exists( constructConfigYamlConcretePath(i_)): self.dumpTo(i_, {"testSplitSeed": s}, ["num_seeds"]) e = Experiment(i_, self.allowResume) e.gpus = self.gpus e.onlyReports = self.onlyReports e.launchTasks = self.launchTasks e.project = self.project paths.append(e) return paths return [self]
def collect_results(project_id, experiment=None): workspace_dir = workspace_folder() files = project_results(project_id, experiment) results_folder = project_results_folder(project_id) temp = results_folder temp = os.path.join(temp, "zip") if os.path.exists(temp): shutil.rmtree(temp) utils.ensure(temp) for item in files: src = os.path.join(workspace_dir, item) print("collecting: " + src) if not os.path.exists(src): continue dst = os.path.join(temp, item) parent = os.path.dirname(dst) if not os.path.exists(parent): utils.ensure(parent) print("collecting: " + src) shutil.copy(src, dst) utils.archive(os.path.join(temp, project_id), os.path.join(results_folder, "project"))
def load_item(url, dest): utils.ensure(dest) parsed = parse_url(url) loader = build_loader(parsed) loader.load(parsed["url"], dest)
def __init__(self, project: musket_projects.Project, experiment=None): tasks.Task.__init__(self) self.project = project self.process = None self.experiment = experiment musket_utils.ensure(self.report_dir())
def get_cache_dir(): if CACHE_DIR is not None: return CACHE_DIR cp = context.get_current_project_path() if cp is None: cp = os.getcwd() d = os.path.join(cp, ".cache/") utils.ensure(d) return d
def copytree(src, dst): utils.ensure(dst) for item in listdir(src): src_item = os.path.join(src, item) dst_item = os.path.join(dst, item) if os.path.isdir(src_item): copytree(src_item, dst_item) else: if os.path.exists(dst_item): os.remove(dst_item) shutil.copy(src_item, dst_item)
def report(self): cf = self.parse_config() path = os.path.join(os.path.dirname(__file__), "templates", "logs.html") template = load_string(path) eex = self.apply() for e in eex: for i in range(cf.folds_count): for j in range(len(cf.stages)): if os.path.exists(e.log_path(i, j)): m = load_string(e.log_path(i, j)) ensure(os.path.join(e.path, "reports")) rp = os.path.join( e.path, "reports", "report-" + str(i) + "." + str(j) + ".html") save_string(rp, template.replace("${metrics}", m))
def perform(self, server, reporter: ProgressMonitor): exp:Experiment=server.experiment(self.experimentPath) ms=ModelSpec(**self.spec) cf=exp.parse_config() wrappedModel = ms.wrap(cf, exp) predictions=wrappedModel.predictions(self.datasetName); ps=str(wrappedModel.stages)+"."+str(wrappedModel.folds) parentPath=os.path.join(os.path.dirname(cf.path),"predictions") ensure(parentPath) p1=os.path.join(parentPath,self.datasetName+ps+"-pr.csv") predictions.dump(p1) if self.exportGroundTruth: p2=os.path.join(parentPath,self.datasetName+ps+"-gt.csv") predictions.dump(p2,encode_y=True) return p1+"::::"+p2 return p1
def mark_loaded(root, url): fullPath = os.path.join(root, ".metadata") utils.ensure(fullPath) fullPath = os.path.join(fullPath, "downloaded_deps.yaml") try: loaded_yaml = load_yaml(fullPath) except: loaded_yaml = {"dependencies": []} deps = loaded_yaml["dependencies"] deps.append(url) utils.save_yaml(fullPath, loaded_yaml)
def do_POST(self): self.send_response(200) self.end_headers() if "/zipfile" in self.path: with self.server.task_manager.lock: destination = utils.temp_folder() shutil.rmtree(destination, True) musket_utils.ensure(destination) zip_path = utils.stream_to_zip(self.rfile, self.headers, "file", destination) shutil.unpack_archive(zip_path, destination) os.remove(zip_path) self.pickup_project()
def get_results(host, project, task_id): url = host + "/download_delta?project_id=" + os.path.basename(project) response = requests.get(url, stream=True) size = int(response.headers.get("Content-Length")) destination = os.path.expanduser("~/.musket_core/delta_zip_download") if os.path.exists(destination): shutil.rmtree(destination) utils.ensure(destination) zip_name = os.path.join(destination, "project") with open(zip_name + ".zip", "wb") as f: pbar = tqdm.tqdm(total=size) for item in response.iter_content(1024): f.write(item) pbar.update(1024) if os.path.exists(zip_name + ".zip"): shutil.unpack_archive(zip_name + ".zip", os.path.dirname(zip_name), "zip") os.remove(zip_name + ".zip") delta_list = delta_files(destination) for item in delta_list: rel_path = os.path.relpath(item, destination) src = os.path.join(destination, rel_path) dst = os.path.join(project, rel_path) utils.ensure(os.path.dirname(dst)) if os.path.exists(dst): os.remove(dst) shutil.copy(src, dst)
def __init__(self, root, id_set = [], parameters = {}): self.root = os.path.join(root, id_set[0] + "_" + id_set[1]) self.parameters = parameters ensure(self.root)
def create(self, d: datasets.DataSet, path): utils.ensure(path) return visualization.Visualizer(self.func, path, d)
def _attach_visualizer(self, visualizer, dataset) -> visualization.Visualizer: visualization = os.path.join(self.path, "visualizations", visualizer.name, dataset.name) utils.ensure(visualization) return visualizer.create(dataset, visualization)
def get_visualizer(self, name, datasetName) -> visualization.Visualizer: visualization = os.path.join(self.path, "visualizations", name, datasetName) utils.ensure(visualization) return self.element(name).create(self.get_dataset(datasetName), visualization)
def to_dataset(src, experiments, name, data=False): try: from kaggle.api.kaggle_api_extended import KaggleApi from kaggle.api_client import ApiClient except: print("Kaggle not found or user credentials not provided.") return api = KaggleApi(ApiClient()) api.authenticate() dest = os.path.expanduser("~/.musket_core/proj_to_dataset") if os.path.exists(dest): shutil.rmtree(dest, ignore_errors=True) utils.ensure(dest) visit_tree( src, lambda path: utils.throw("zip files not allowed!") if path.lower().endswith(".zip") else ()) if data: src = os.path.join(src, "data") dest = os.path.join(dest, "data") shutil.copytree(src, dst) else: utils.collect_project(src, dest, True, False, experiments) api.dataset_initialize(dest) metapath = os.path.join(dest, "dataset-metadata.json") with open(metapath, "r") as f: metadata = f.read() metadata = metadata.replace("INSERT_SLUG_HERE", name).replace("INSERT_TITLE_HERE", name) with open(metapath, "w") as f: f.write(metadata) id = json.loads(metadata)["id"] sets = [] page = 1 resp = api.dataset_list(mine=True, search=name, page=page) while len(resp): sets += resp page += 1 resp = api.dataset_list(mine=True, search=name, page=page) if id in [str(item) for item in sets]: api.dataset_create_version(dest, delete_old_versions=True, convert_to_csv=False, version_notes="new version", dir_mode="zip") else: api.dataset_create_new(dest, convert_to_csv=False, dir_mode="zip")
def __init__(self, task_manager): self.root = os.path.expanduser("~/.musket_core/server_workspace") self.task_manager: tasks.TaskManager = task_manager musket_utils.ensure(self.root)