Beispiel #1
0
def write_template(output_path, template_path: str, **kwargs):
    """
        Write an executable output file using Jinja template.
    """
    logging.info(
        f'Write command {output_path} using template {basename(template_path)}'
    )
    try:
        makedirs(dirname(output_path), exist_ok=True)
        with open(template_path, 'r') as template_fd, open(output_path,
                                                           'w') as fd:
            content = render_string_template(template_fd.read(), **kwargs)
            fd.write(content)
        chmod(output_path, 0o755)
    except IOError as e:
        raise MlVToolException(
            f'Cannot create executable {output_path} using template {template_path}'
        ) from e
    except UndefinedError as e:
        raise MlVToolException(
            f'Cannot render {output_path} using template {template_path} due to undefined '
            f'variable: {e}') from e
    except TemplateError as e:
        raise MlVToolException(
            f'Cannot render {output_path} using template {template_path}'
        ) from e
 def from_meta(args: List[str], description: str) -> 'DocstringDvcExtra':
     if len(args) != 1 or not description:
         raise MlVToolException(
             f'Docstring dvc-extra invalid syntax: {args}:{description}.'
             f'Expected :dvc-extra: {{python_other_param}}')
     if args[0] != DocstringDvcExtra.DVC_EXTRA_KEY:
         raise MlVToolException(
             f'Receive bad parameter for {DocstringDvcExtra.DVC_EXTRA_KEY} {args[0]}'
         )
     return DocstringDvcExtra(description)
 def from_meta(args: List[str], description: str) -> 'DocstringDvcCommand':
     if len(args) != 1 or not description:
         raise MlVToolException(
             f'Docstring dvc-cmd invalid syntax: {args}:{description}.'
             f'Expected :dvc-cmd: {{dvc_command}}')
     if args[0] != DocstringDvcCommand.DVC_CMD_KEY:
         raise MlVToolException(
             f'Receive bad parameter for {DocstringDvcCommand.DVC_CMD_KEY} {args[0]}'
         )
     return DocstringDvcCommand(description)
Beispiel #4
0
def get_ast(content: str, name: str = 'undefined'):
    """
        Return ast tree of the given python content
    """
    try:
        return ast.parse(content, filename=name)
    except SyntaxError as e:
        raise MlVToolException(
            f'Invalid python format for file {name}: {e}') from e
    except Exception as e:
        raise MlVToolException(
            f'Cannot extract ast tree{f" {name}" if name else ""}: {e}') from e
 def from_meta(args: List[str], description: str) -> 'DocstringDvcMetaFile':
     if len(args) != 1 or not description:
         raise MlVToolException(
             f'Docstring dvc-meta-file invalid syntax: {args}:{description}.'
             f'Expected :dvc-meta-file: {{meta_file_name}}')
     if args[0] != DocstringDvcMetaFile.DVC_META_FILE_KEY:
         raise MlVToolException(
             f'Receive bad parameter for {DocstringDvcMetaFile.DVC_META_FILE_KEY} {args[0]}'
         )
     description = description if description.endswith(
         '.dvc') else f'{description}.dvc'
     return DocstringDvcMetaFile(description)
Beispiel #6
0
def get_dvc_params(docstring: Docstring) -> DvcParams:
    """
        Return a set of dvc docstring parameters
        (dvc dependencies, outputs, extra parameters or whole command)
    """
    dvc_in = []
    dvc_out = []
    dvc_out_persist = []
    dvc_extra = []
    dvc_cmd = []
    params = {param.arg_name: param.type_name for param in docstring.params}
    dvc_meta = None
    for meta in docstring.meta:
        if not meta.args:
            continue
        if meta.args[0] == DocstringDvcIn.DVC_IN_KEY:
            dvc_in.append(
                DocstringDvcIn.from_meta(params, meta.args, meta.description))
        elif meta.args[0] == DocstringDvcOut.DVC_OUT_KEY:
            dvc_out.append(
                DocstringDvcOut.from_meta(params, meta.args, meta.description))
        elif meta.args[0] == DocstringDvcOutPersist.DVC_OUT_PERSIST_KEY:
            dvc_out_persist.append(
                DocstringDvcOutPersist.from_meta(params, meta.args,
                                                 meta.description))
        elif meta.args[0] == DocstringDvcExtra.DVC_EXTRA_KEY:
            dvc_extra.append(
                DocstringDvcExtra.from_meta(meta.args, meta.description))
        elif meta.args[0] == DocstringDvcMetaFile.DVC_META_FILE_KEY:
            dvc_meta = DocstringDvcMetaFile.from_meta(meta.args,
                                                      meta.description)
        elif meta.args[0] == DocstringDvcCommand.DVC_CMD_KEY:
            dvc_cmd.append(
                DocstringDvcCommand.from_meta(meta.args, meta.description))
    if len(dvc_cmd) > 1:
        raise MlVToolException(
            f'Only one occurence of {DocstringDvcCommand.DVC_CMD_KEY} is allowed'
        )
    if dvc_cmd and (dvc_in or dvc_out or dvc_extra):
        raise MlVToolException(
            f'Dvc command {DocstringDvcCommand.DVC_CMD_KEY} is exclusive with other dvc parameters '
            f'[{DocstringDvcExtra.DVC_EXTRA_KEY}, {DocstringDvcIn.DVC_IN_KEY}, '
            f'{DocstringDvcOut.DVC_OUT_KEY}, {DocstringDvcOutPersist.DVC_OUT_PERSIST_KEY}]'
        )

    return DvcParams(dvc_in, dvc_out, dvc_out_persist, dvc_extra,
                     dvc_cmd[0] if dvc_cmd else '',
                     dvc_meta.file_name if dvc_meta else '')
Beispiel #7
0
    def run(self, *args, **kwargs):
        args = ArgumentBuilder(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
                               description='Convert Notebook to python script') \
            .add_work_dir_argument() \
            .add_conf_path_argument() \
            .add_docstring_conf() \
            .add_force_argument() \
            .add_argument('-n', '--notebook', type=str, required=True,
                          help='The notebook to convert') \
            .parse(args)
        self.set_log_level(args)
        conf = self.get_conf(args.working_directory, args.notebook,
                             args.conf_path)
        if not conf.path:
            raise MlVToolException('Configuration file is mandatory')
        docstring_conf_path = args.docstring_conf or conf.docstring_conf
        docstring_conf = load_docstring_conf(
            docstring_conf_path) if docstring_conf_path else None

        output_script = get_script_output_path(args.notebook, conf)
        out_dvc_cmd = get_dvc_cmd_output_path(output_script, conf)
        self.check_force(args.force, [output_script, out_dvc_cmd])

        export_to_script(args.notebook, output_script, conf)
        gen_dvc_command(output_script, out_dvc_cmd, conf, docstring_conf)
Beispiel #8
0
def write_python_script(script_content: str, output_path: str):
    """
        Write Python 3 generated code into an executable file
        - use yapf for code format
    """
    try:
        makedirs(dirname(output_path), exist_ok=True)
        formatted_script = FormatCode(script_content, style_config=f'{{ based_on_style: pep8, '
                                                                   f'column_limit: {MAX_LINE_LENGTH} }}')
        with open(output_path, 'w') as fd:
            fd.write(formatted_script[0])
        chmod(output_path, 0o755)
    except SyntaxError as e:
        raise MlVToolException(f'Cannot write generated Python, content is wrongly formatted: {script_content}') from e
    except IOError as e:
        raise MlVToolException(f'Cannot write generated Python script {output_path}') from e
Beispiel #9
0
    def run(self, *args, **kwargs):
        args = ArgumentBuilder(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
                               description='Checks all notebooks and scripts consistency.\n'
                                           'Run the up to date checks on all notebooks from the notebook directory. '
                                           'Script names are deduce from the conf.') \
            .add_work_dir_argument() \
            .add_conf_path_argument() \
            .add_path_argument('-n', '--notebooks-dir', type=str, help='Notebooks directory') \
            .add_argument('-i', '--ignore', action='append', help='Notebook filename to ignore', default=[]) \
            .parse(args)

        self.set_log_level(args)
        conf = self.get_conf(args.working_directory, args.notebooks_dir,
                             args.conf_path)
        if not conf.path:
            raise MlVToolException('Configuration file is mandatory')

        equals = True
        for notebook in glob.glob(join(args.notebooks_dir, '*.ipynb')):
            if basename(notebook) in args.ignore:
                logging.info(f'Ignore notebook {notebook}')
                continue

            associated_script = get_script_output_path(notebook, conf)
            equals = run_consistency_check(notebook, associated_script,
                                           conf) and equals
        sys.exit(0 if equals else 1)
Beispiel #10
0
    def run(self, *args, **kwargs):
        args = ArgumentBuilder(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
                               description='Generate python script wrappers') \
            .add_work_dir_argument() \
            .add_conf_path_argument() \
            .add_force_argument() \
            .add_docstring_conf() \
            .add_path_argument('-i', '--input-script', type=str, required=True,
                               help='The python input script') \
            .add_path_argument('-o', '--out-dvc-cmd', type=str,
                               help='Path to the generated bash dvc command') \
            .parse(args)

        self.set_log_level(args)
        conf = self.get_conf(args.working_directory, args.input_script,
                             args.conf_path)
        docstring_conf_path = args.docstring_conf or conf.docstring_conf

        if not conf.path and not args.out_dvc_cmd:
            raise MlVToolException(
                'Parameter --out-dvc-cmd is mandatory if no conf provided')

        docstring_conf = load_docstring_conf(
            docstring_conf_path) if docstring_conf_path else None
        out_dvc_cmd = args.out_dvc_cmd or get_dvc_cmd_output_path(
            args.input_script, conf)
        self.check_force(args.force, [out_dvc_cmd])
        gen_dvc_command(args.input_script, out_dvc_cmd, conf, docstring_conf)
Beispiel #11
0
 def check_force(self, force: bool, outputs: List[str]):
     if force:
         return
     for output in outputs:
         if exists(output):
             raise MlVToolException(f'Output file {output} already exists, '
                                    f'use --force option to overwrite it')
def get_dvc_meta(dvc_meta_file: str) -> DvcMeta:
    """
        Get DVC meta from a DVC meta file
    """
    logging.debug(f'Get DVC meta from {dvc_meta_file}')
    try:
        with open(dvc_meta_file, 'r') as fd:
            raw_data = yaml.load(fd.read())
            deps = [v['path'] for v in raw_data.get('deps', [])]
            outs = [v['path'] for v in raw_data.get('outs', [])]
            meta = DvcMeta(basename(dvc_meta_file), raw_data.get('cmd', ''), deps, outs)
            logging.debug(f'Meta for {dvc_meta_file}: {meta}')
            return meta
    except (yaml.error.YAMLError, AttributeError) as e:
        raise MlVToolException(f'Cannot load DVC meta file {dvc_meta_file}. Wrong format') from e
    except IOError as e:
        raise MlVToolException(f'Cannot load DVC meta file {dvc_meta_file}') from e
def resolve_docstring(docstring: str, docstring_conf: dict) -> str:
    """
        Use jinja to resolve docstring template using user custom configuration
    """
    try:
        return render_string_template(docstring, conf=docstring_conf)
    except jinja2.exceptions.TemplateError as e:
        raise MlVToolException(
            f'Cannot resolve docstring using Jinja, {e}') from e
Beispiel #14
0
def get_git_top_dir(cwd: str) -> str:
    try:
        return subprocess.check_output(['git', 'rev-parse', '--show-toplevel'], cwd=cwd) \
            .decode() \
            .strip('\n')
    except subprocess.SubprocessError as e:
        message = 'Can not run \'git rev-parse\' command to get top directory. Input files must belong ' \
                  'to a git repository.'
        logging.fatal(message)
        raise MlVToolException(message) from e
Beispiel #15
0
def get_ast_from_file(file_path: str):
    """
        Read provided file then return the corresponding ast tree
    """
    try:
        with open(file_path, 'r') as fd:
            return get_ast(fd.read(), file_path)
    except IOError as e:
        raise MlVToolException(
            f'Cannot read file {file_path} for ast tree extraction') from e
def get_dvc_files(dvc_target_file: str) -> List[str]:
    """
        Return the list of potential DVC meta file pipeline step.
        DVC meta files are all located in the same directory for a given pipeline.
        DVC file extension: .dvc
    """
    if not exists(dvc_target_file):
        raise MlVToolException(
            f'Targeted pipeline metadata step {dvc_target_file} does not exist'
        )
    return glob.glob(join(dirname(dvc_target_file), '*.dvc'))
Beispiel #17
0
def extract_docstring_from_file(input_path: str,
                                docstring_conf: dict = None) -> DocstringInfo:
    """
        Extract method docstring information (docstring, method_name, input_path)
        The provided python script must have one and only one method
        The extracted docstring is parsed and returned in docstring info
    """
    logging.info(f'Extract docstring from "{input_path}".')
    try:
        with open(input_path, 'r') as fd:
            root = ast.parse(fd.read())
    except FileNotFoundError as e:
        raise MlVToolException(
            f'Python input script {input_path} not found.') from e
    except SyntaxError as e:
        raise MlVToolException(
            f'Invalid python script format: {input_path}') from e

    for node in ast.walk(root):
        if isinstance(node, ast.FunctionDef):
            method_name = node.name
            docstring_str = ast.get_docstring(node)
            if docstring_conf:
                docstring_str = resolve_docstring(docstring_str,
                                                  docstring_conf)
            docstring = dc_parse(docstring_str)
            break
    else:
        logging.error(f'Not method found in {input_path}')
        raise MlVToolException(f'Not method found in {input_path}')

    logging.debug(
        f'Docstring extracted from method {method_name}: {docstring_str}')
    docstring_info = DocstringInfo(method_name=method_name,
                                   docstring=docstring,
                                   repr=docstring_str,
                                   file_path=input_path)
    return docstring_info
Beispiel #18
0
def extract_docstring(cell_content: str) -> str:
    """ Extract a docstring from a cell content """
    logging.info('Extract docstring from cell content')
    logging.debug(f'Cell content {cell_content}')
    docstring = ''
    try:
        root = get_ast(cell_content)
    except SyntaxError as e:
        raise MlVToolException(
            f'Invalid python cell format: {cell_content}') from e
    for node in ast.walk(root):
        if isinstance(node, ast.Module):
            docstring = ast.get_docstring(node)
            break
    return docstring
Beispiel #19
0
    def run(self, *args, **kwargs):
        args = ArgumentBuilder(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
                               description='Export a DVC pipeline to sequential execution.') \
            .add_force_argument() \
            .add_work_dir_argument() \
            .add_argument('--dvc', type=str, required=True, help='DVC targeted pipeline metadata step') \
            .add_argument('-o', '--output', type=str, help='The Python pipeline script output path',
                          required=True) \
            .parse(args)

        self.set_log_level(args)
        work_dir = args.working_directory

        if not args.force and exists(args.output):
            raise MlVToolException(
                f'Output file {args.output} already exists, use --force option to overwrite it'
            )

        export_pipeline(args.dvc, args.output, work_dir)
Beispiel #20
0
def get_converted_script(input_notebook_path: str, conf: MlVToolConf) -> str:
    """
        Extract notebook python content using nbconvert
    """
    exporter = PythonExporter(get_config(TEMPLATE_PATH))
    exporter.register_filter(name='filter_trailing_cells',
                             jinja_filter=filter_trailing_cells)
    exporter.register_filter(name='get_formatted_cells',
                             jinja_filter=get_formatted_cells)
    exporter.register_filter(name='get_data_from_docstring',
                             jinja_filter=get_data_from_docstring)
    exporter.register_filter(name='sanitize_method_name',
                             jinja_filter=to_method_name)
    resources = {'ignore_keys': conf.ignore_keys}
    logging.debug(f'Template info {resources}')
    try:
        script_content, _ = exporter.from_filename(input_notebook_path,
                                                   resources=resources)
    except Exception as e:
        raise MlVToolException(e) from e
    return script_content
Beispiel #21
0
    def run(self, *args, **kwargs):
        args = ArgumentBuilder(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
                               description='Convert Notebook to python script') \
            .add_work_dir_argument() \
            .add_conf_path_argument() \
            .add_force_argument() \
            .add_path_argument('-n', '--notebook', type=str, required=True,
                               help='The notebook to convert') \
            .add_path_argument('-o', '--output', type=str,
                               help='The Python script output path') \
            .parse(args)
        self.set_log_level(args)
        conf = self.get_conf(args.working_directory, args.notebook,
                             args.conf_path)

        if not conf.path and not args.output:
            raise MlVToolException(
                'Parameter --output is mandatory if no conf provided')

        output = args.output or get_script_output_path(args.notebook, conf)

        self.check_force(args.force, [output])
        export_to_script(args.notebook, output, conf)
def export_to_script(input_notebook_path: str, output_path: str,
                     conf: MlVToolConf):
    """
        Export a notebook to a parameterize Python 3 script
        using Jinja templates
    """
    logging.info(
        f'Generate Python script {output_path} from Jupyter Notebook {input_notebook_path}'
    )
    logging.debug(f'Global Configuration: {conf}')
    logging.debug(f'Template path {TEMPLATE_PATH}')

    exporter = PythonExporter(get_config(TEMPLATE_PATH))
    exporter.register_filter(name='filter_trailing_cells',
                             jinja_filter=filter_trailing_cells)
    exporter.register_filter(name='get_formatted_cells',
                             jinja_filter=get_formatted_cells)
    exporter.register_filter(name='get_data_from_docstring',
                             jinja_filter=get_data_from_docstring)
    exporter.register_filter(name='sanitize_method_name',
                             jinja_filter=to_method_name)
    resources = {'ignore_keys': conf.ignore_keys}
    logging.debug(f'Template info {resources}')
    try:
        script_content, _ = exporter.from_filename(input_notebook_path,
                                                   resources=resources)
    except Exception as e:
        raise MlVToolException(e) from e

    if not script_content:
        logging.warning('Empty notebook provided. Nothing to do.')
        return
    write_python_script(script_content, output_path)
    logging.log(
        logging.WARNING + 1,
        f'Python script successfully generated in {abspath(output_path)}')
    def meta_checks(params: Dict[str, Optional[str]], args: List[str],
                    description: str, expected_key: str):
        if len(args) == 0:
            raise MlVToolException('Cannot parse empty DocstringDVC')
        if len(args) > 2:
            raise MlVToolException(
                f'Invalid syntax: {args}. Expected :dvc-[in|out] [related_param]?: {{file_path}}'
            )
        if args[0] != expected_key:
            raise MlVToolException('Receive bad parameter {}'.format(args[0]))

        if not description:
            raise MlVToolException(f'Not path given for {args}')

        related_param = args[1] if len(args) == 2 else None

        if related_param and related_param not in params:
            raise MlVToolException(
                f'Cannot find related parameter for {related_param} in {args}')

        if related_param and params[related_param] not in (None, 'str'):
            raise MlVToolException(
                f'Unsupported type {params[related_param]} for {args}. Discard.'
            )
def get_work_directory(input_path: str) -> str:
    if not exists(input_path):
        raise MlVToolException(f'Input file {input_path} does not exist.')
    return get_git_top_dir(dirname(input_path))
def parse_docstring(docstring_str: str) -> Docstring:
    try:
        docstring = dc_parse(docstring_str, style=Style.rest)
    except ParseError as e:
        raise MlVToolException(f'Docstring format error. {e}') from e
    return docstring