Exemple #1
0
def _generate_hysdsio_params(nb_name):  # private method
    nb_params = papermill.inspect_notebook(nb_name)
    params = []

    for k, p in nb_params.items():
        if k.startswith('hysds_'):
            continue

        param_type = p['inferred_type_name']
        description = p['help']

        hysdsio_param = {
            'name': k,
            'from': 'submitter',
            'type': _get_hysdsio_param_type(param_type)
        }

        if param_type.lower() != 'enum':
            default_value = p['default']
        else:
            enums, default_value = _extract_enumerable_values(p)
            hysdsio_param['enumerables'] = enums

        if description:
            hysdsio_param['description'] = description

        print("default_value: ", default_value)
        try:
            hysdsio_param['default'] = json.loads(default_value)
        except Exception as e:
            print(e)
            raise RuntimeError("make sure your parameter follows JSON standards: {}".format(default_value))

        params.append(hysdsio_param)
    return params
Exemple #2
0
def _build_notebook_params(nb, ctx):
    nb_params = papermill.inspect_notebook(nb)

    params = {}
    for k, p in nb_params.items():
        if k.startswith('hysds_'):
            continue
        if ctx.get(k) is not None:  # if key is found in _context.json then populate params dict with value
            params[k] = ctx[k]
    return params
Exemple #3
0
def infer_notebook_params(notebook_path) -> List[Tuple[str, OrderedDict]]:
    """
    A helper function to infer the notebook params 

    :param notebook_path: 
    :return: 
    """

    return [(name, infer_required(properties))
            for name, properties in inspect_notebook(notebook_path).items()]
Exemple #4
0
def generate_job_spec(time_limit=__DEFAULT_TIME_LIMIT, soft_time_limit=__DEFAULT_SOFT_TIME_LIMIT,
                      disk_usage=__DEFAULT_DISK_USAGE, required_queue=None, nb=None, command=None):
    """
    example: {
        "required_queues":["system-jobs-queue"],
        "command":"/path/to/papermill_wrapper.py $HOME/hello_world.ipynb",
        "disk_usage":"3GB",
        "soft_time_limit": 86400,
        "time_limit": 86700,
        "params" : [...]
    }
    :param time_limit: int
    :param soft_time_limit: int
    :param disk_usage: str (KB, MB, GB) ex. 10GB
    :param required_queue: str or List[str]
    :param nb: str, path of Jupyter notebook
    :param command: str, command field in job_specs json
    :return: Dict[str, <any>]
    """
    if required_queue is None:
        raise RuntimeError("required_queue not provided")
    if not nb:
        raise RuntimeError("Jupyter notebook not supplied")

    if isinstance(required_queue, str):
        required_queue = [required_queue]

    nb_params = papermill.inspect_notebook(nb)
    params = []

    for key in nb_params:
        if key.startswith('hysds_'):
            continue

        params.append({
            'name': key,
            'destination': 'context'
        })

    repo = os.getcwd().split('/')[-1]
    pge_verdi_path = os.path.join(repo, nb)

    output_job_spec = {
        'command': command or 'notebook-pge-wrapper execute $HOME/%s' % pge_verdi_path,
        'time_limit': time_limit,
        'soft_time_limit': soft_time_limit,
        'disk_usage': disk_usage,
        'required_queues': required_queue,
        'imported_worker_files': {
            "$HOME/.aws": "/home/ops/.aws"
        },
        'params': params
    }
    return output_job_spec
Exemple #5
0
def extract_hysds_specs(nb_name):
    nb_params = papermill.inspect_notebook(nb_name)

    hysds_specs = {}
    for k, p in nb_params.items():
        if not k.startswith('hysds_'):
            continue

        k = k.replace('hysds_', '')
        default_value = p['default']
        try:
            hysds_specs[k] = json.loads(default_value)
        except (json.JSONDecodeError, Exception) as e:
            print(default_value, e)
            traceback.print_exc()
            p['default'] = default_value[1:-1]
    return hysds_specs
Exemple #6
0
    async def _get_templates(self) -> tp.List[tp.Dict]:
        """Get the templates list.

        A template object is defined as:

            @dataclass
            class Template():
                path: str
                parameters: List[Parameter]

        Returns:
            List[Template] List of templates

        Raises:
            CalledProcessError: If the git repository cannot be updated.
        """
        await update_git_repository(
            self.report_root_path, self.report_path, self.report_git_url
        )
        template_dir = Path(self.report_root_path) / self.report_path

        templates = []
        for report in template_dir.glob("**/*.ipynb"):
            parameters = {}
            try:
                parameters = pm.inspect_notebook(str(report))
            except BaseException:
                self.log.warning(
                    f"Unable to get the parameters for notebook '{report!s}'.",
                    exc_info=True,
                )
            templates.append(
                {
                    "path": str(report.relative_to(template_dir)),
                    # Convert to dict to avoid OrderedDict as parameter object
                    "parameters": [dict(v) for v in parameters.values()],
                }
            )

        return templates
Exemple #7
0
def notebook_add_metadata(target_notebook):

  with open(target_notebook, "r") as f:
    j = json.load(f)

  param_cell = next(cell for cell in j['cells'] if "#@param" in "\n".join(cell["source"]))
  
  print("Found parameter cell", param_cell)
  param_cell['metadata']['tags'] = ['parameters']
  param_cell['metadata']

  j['metadata']['kernelspec']['language'] = 'python'

  with open(target_notebook, "w") as f:
    #print("Writing modified notebook to", target_notebook)
    json.dump(j, f)

notebook_path = sys.argv[1]


notebook_add_metadata(notebook_path)

parameters = pm.inspect_notebook(notebook_path)
print(json.dumps(parameters,sort_keys=True, indent=4))

# ipfs_root = sys.argv[2]
# for key, value in parameters.items():
#     print(value["default"], file=open(f"{ipfs_root}/input/{key}", 'w'))