def _generate_hysdsio_params(nb_name): # private method nb_params = papermill.inspect_notebook(nb_name) params = [] for k, p in nb_params.items(): if k.startswith('hysds_'): continue param_type = p['inferred_type_name'] description = p['help'] hysdsio_param = { 'name': k, 'from': 'submitter', 'type': _get_hysdsio_param_type(param_type) } if param_type.lower() != 'enum': default_value = p['default'] else: enums, default_value = _extract_enumerable_values(p) hysdsio_param['enumerables'] = enums if description: hysdsio_param['description'] = description print("default_value: ", default_value) try: hysdsio_param['default'] = json.loads(default_value) except Exception as e: print(e) raise RuntimeError("make sure your parameter follows JSON standards: {}".format(default_value)) params.append(hysdsio_param) return params
def _build_notebook_params(nb, ctx): nb_params = papermill.inspect_notebook(nb) params = {} for k, p in nb_params.items(): if k.startswith('hysds_'): continue if ctx.get(k) is not None: # if key is found in _context.json then populate params dict with value params[k] = ctx[k] return params
def infer_notebook_params(notebook_path) -> List[Tuple[str, OrderedDict]]: """ A helper function to infer the notebook params :param notebook_path: :return: """ return [(name, infer_required(properties)) for name, properties in inspect_notebook(notebook_path).items()]
def generate_job_spec(time_limit=__DEFAULT_TIME_LIMIT, soft_time_limit=__DEFAULT_SOFT_TIME_LIMIT, disk_usage=__DEFAULT_DISK_USAGE, required_queue=None, nb=None, command=None): """ example: { "required_queues":["system-jobs-queue"], "command":"/path/to/papermill_wrapper.py $HOME/hello_world.ipynb", "disk_usage":"3GB", "soft_time_limit": 86400, "time_limit": 86700, "params" : [...] } :param time_limit: int :param soft_time_limit: int :param disk_usage: str (KB, MB, GB) ex. 10GB :param required_queue: str or List[str] :param nb: str, path of Jupyter notebook :param command: str, command field in job_specs json :return: Dict[str, <any>] """ if required_queue is None: raise RuntimeError("required_queue not provided") if not nb: raise RuntimeError("Jupyter notebook not supplied") if isinstance(required_queue, str): required_queue = [required_queue] nb_params = papermill.inspect_notebook(nb) params = [] for key in nb_params: if key.startswith('hysds_'): continue params.append({ 'name': key, 'destination': 'context' }) repo = os.getcwd().split('/')[-1] pge_verdi_path = os.path.join(repo, nb) output_job_spec = { 'command': command or 'notebook-pge-wrapper execute $HOME/%s' % pge_verdi_path, 'time_limit': time_limit, 'soft_time_limit': soft_time_limit, 'disk_usage': disk_usage, 'required_queues': required_queue, 'imported_worker_files': { "$HOME/.aws": "/home/ops/.aws" }, 'params': params } return output_job_spec
def extract_hysds_specs(nb_name): nb_params = papermill.inspect_notebook(nb_name) hysds_specs = {} for k, p in nb_params.items(): if not k.startswith('hysds_'): continue k = k.replace('hysds_', '') default_value = p['default'] try: hysds_specs[k] = json.loads(default_value) except (json.JSONDecodeError, Exception) as e: print(default_value, e) traceback.print_exc() p['default'] = default_value[1:-1] return hysds_specs
async def _get_templates(self) -> tp.List[tp.Dict]: """Get the templates list. A template object is defined as: @dataclass class Template(): path: str parameters: List[Parameter] Returns: List[Template] List of templates Raises: CalledProcessError: If the git repository cannot be updated. """ await update_git_repository( self.report_root_path, self.report_path, self.report_git_url ) template_dir = Path(self.report_root_path) / self.report_path templates = [] for report in template_dir.glob("**/*.ipynb"): parameters = {} try: parameters = pm.inspect_notebook(str(report)) except BaseException: self.log.warning( f"Unable to get the parameters for notebook '{report!s}'.", exc_info=True, ) templates.append( { "path": str(report.relative_to(template_dir)), # Convert to dict to avoid OrderedDict as parameter object "parameters": [dict(v) for v in parameters.values()], } ) return templates
def notebook_add_metadata(target_notebook): with open(target_notebook, "r") as f: j = json.load(f) param_cell = next(cell for cell in j['cells'] if "#@param" in "\n".join(cell["source"])) print("Found parameter cell", param_cell) param_cell['metadata']['tags'] = ['parameters'] param_cell['metadata'] j['metadata']['kernelspec']['language'] = 'python' with open(target_notebook, "w") as f: #print("Writing modified notebook to", target_notebook) json.dump(j, f) notebook_path = sys.argv[1] notebook_add_metadata(notebook_path) parameters = pm.inspect_notebook(notebook_path) print(json.dumps(parameters,sort_keys=True, indent=4)) # ipfs_root = sys.argv[2] # for key, value in parameters.items(): # print(value["default"], file=open(f"{ipfs_root}/input/{key}", 'w'))