Beispiel #1
0
 def respond_file(self, path):
     abs_path = os.path.abspath(path)
     naas_type = mimetypes.guess_type(abs_path)[0]
     display(Markdown("Response Set as File, preview below: "))
     display(
         JSON(
             {"path": abs_path}, metadata={"naas_api": True, "naas_type": naas_type}
         )
     )
Beispiel #2
0
def to_mime_and_metadata(obj) -> (dict, dict):  # noqa: C901
    if isinstance(obj, bytes):
        obj = base64.b64encode(obj).decode("utf-8")
        return {"text/html": to_html(obj)}, {}
    elif isinstance(obj, str) and obj.startswith("http"):
        if re.match(r".*\.(gif|jpg|svg|jpeg||png)$", obj, re.I):
            try:
                return Image(obj, embed=True)._repr_mimebundle_()
            except TypeError:
                pass
        return {"text/html": to_html(obj)}, {}
    elif isinstance(obj, str) and len(obj) < 1024 and os.path.exists(obj):
        if re.match(r".*\.(gif|jpg|svg|jpeg||png)$", obj, re.I):
            try:
                return Image(obj, embed=True)._repr_mimebundle_()
            except TypeError:
                pass
        return {"text/html": to_html(obj)}, {}
    elif hasattr(obj, "_repr_mimebundle_"):
        obj.embed = True
        return obj._repr_mimebundle_()
    elif hasattr(obj, "_repr_json_"):
        obj.embed = True
        return {"application/json": obj._repr_json_()}, {}
    elif hasattr(obj, "_repr_html_"):
        obj.embed = True
        return {"text/html": obj._repr_html_()}, {}
    elif hasattr(obj, "_repr_png_"):
        return {"image/png": obj._repr_png_()}, {}
    elif hasattr(obj, "_repr_jpeg_"):
        return {"image/jpeg": obj._repr_jpeg_()}, {}
    elif hasattr(obj, "_repr_svg_"):
        return {"image/svg": obj._repr_svg_()}, {}
    try:
        if isinstance(obj, str):
            return {
                "text/html": f"<pre>{to_html(obj)}</pre>".replace("\\n", "\n")
            }, {}
        else:
            data, metadata = JSON(data=obj, expanded=True)._repr_json_()
            return (
                {
                    "application/json": data,
                    "text/html": f"<pre>{to_html(obj)}</pre>"
                },
                metadata,
            )
    except (TypeError, JSONDecodeError):
        pass
    try:
        return {"text/html": to_html(obj)}, {}
    except TypeError:
        return {}, {}
Beispiel #3
0
def to_mime_and_metadata(obj) -> (dict, dict):  # noqa: C901
    if isinstance(obj, bytes):
        obj = base64.b64encode(obj).decode('utf-8')
        return {'text/html': to_html(obj)}, {}
    elif isinstance(obj, str) and obj.startswith('http'):
        if re.match(r'.*\.(gif|jpg|svg|jpeg||png)$', obj, re.I):
            try:
                return Image(obj, embed=True)._repr_mimebundle_()
            except TypeError:
                pass
        return {'text/html': to_html(obj)}, {}
    elif isinstance(obj, str) and len(obj) < 1024 and os.path.exists(obj):
        if re.match(r'.*\.(gif|jpg|svg|jpeg||png)$', obj, re.I):
            try:
                return Image(obj, embed=True)._repr_mimebundle_()
            except TypeError:
                pass
        return {'text/html': to_html(obj)}, {}
    elif hasattr(obj, '_repr_mimebundle_'):
        obj.embed = True
        return obj._repr_mimebundle_()
    elif hasattr(obj, '_repr_json_'):
        obj.embed = True
        return {'application/json': obj._repr_json_()}, {}
    elif hasattr(obj, '_repr_html_'):
        obj.embed = True
        return {'text/html': obj._repr_html_()}, {}
    elif hasattr(obj, '_repr_png_'):
        return {'image/png': obj._repr_png_()}, {}
    elif hasattr(obj, '_repr_jpeg_'):
        return {'image/jpeg': obj._repr_jpeg_()}, {}
    elif hasattr(obj, '_repr_svg_'):
        return {'image/svg': obj._repr_svg_()}, {}
    try:
        data, metadata = JSON(data=obj, expanded=True)._repr_json_()
        return {
            'application/json': data,
            'text/html': f'<pre>{to_html(obj)}</pre>',
        }, metadata
    except (TypeError, JSONDecodeError):
        pass
    try:
        return {'text/html': to_html(obj)}, {}
    except TypeError:
        return {}, {}
Beispiel #4
0
def kgtk(
    arg1: typing.Union[str, pandas.DataFrame],
    arg2: typing.Optional[str] = None,
    df: typing.Optional[pandas.DataFrame] = None,
    auto_display_html: typing.Optional[bool] = None,
    auto_display_json: typing.Optional[bool] = None,
    auto_display_md: typing.Optional[bool] = None,
    unquote_column_names: typing.Optional[bool] = None,
    bash_command: typing.Optional[str] = None,
    kgtk_command: typing.Optional[str] = None,
) -> typing.Optional[pandas.DataFrame]:
    """This function simplifies using KGTK commands in a Jupyter Lab environment.
    Invocation
    ==========

    kgtk("pipeline")

        Execute the command pipeline.  The results are printed, displayed, or
    returned as a Pandas DataFrame.

    kgtk(df, "pipeline")

        The `df` in the call is a Pandas DataFrame, which is converted to KGTK
    format and passed to the pipeline as standard input. The results are
    printed, displayed, or returned as a Pandas DataFrame.

    Optional Parameters
    ======== ==========

    df=DF (default None)
    
        This is an alternate method for specifying an input DataFrame.

    auto_display_html=True/False (default True)

        This parameter controls the processing of HTML output.  See below.

    auto_display_json=True/False (default True)

        This parameter controls the processing of JSON output.  See below.

    auto_display_md=True/False (default False)

        This parameter controls the processing of MarkDown output.  See below.

    unquote_column_names=True/False (default True)

        Convert string column names to symbols.

    bash_command=CMD (default 'bash')

        This parameter specifies the name of the shell interpreter.  If the
    envar KGTK_BASH_COMMAND is present, it will supply the default value for
    the name of the shell interpreter.

    kgtk_command=CMD (default 'kgtk')

        This parameter specifies the kgtk shell command.  If the envar
    KGTK_KGTK_COMMAND is present, it will supply the default value for the
    name of the `kgtk` command.

        One use for this feature is to redefine the `kgtk` command to include
    `time` as a prefix, and/or to include common options.

    Standard Output Processing
    ======== ====== =========

    If the standard output of the pipeline is in HTML format (`--output-format HTML` or
    `kgtk("... /html")`), identified by starting with `<!DOCTYPE html>`, the
    output will be displayed with `display(HTML(output))` by default.
    However, if `kgtk(... auto_display_json=False)` or if the envar
    `KGTK_AUTO_DISPLAY_HTML` set to `false`, then the output will be printed.

    If the standard output of the pipeline is in JSON format (`--output-format JSON`),
    identified as starting with `[` or '{', the output will be displayed with
    `display(JSON(output))` by default.  However, if
    `kgtk(... auto_display_json=False)` or if the envar
    `KGTK_AUTO_DISPLAY_JSON` set to `false`, then the output will be printed.

    If the standard output of the pipeline is MarkDown format (typically by
    ending the pipeline in `... / md` or `... /table`, identified as starting
    with `|`, the output will be printed by default.  However, if
    `auto_display_md=True` is passed in the `kgtk(...)` call, or if the envar
    `KGTK_AUTO_DISPLAY_MD` is set to `true`, then the MarkDown will be
    displayed using `display(Markdown(output))`.

    If the standard output of the pipeline begins with "usage:", then it is
    treated as output from `--help` and printed.

    If the standard output starts with anything other than the cases listed
    above, then the output is assumed to be in KGTK format.  It is converted
    to a Pandas DataFrame, which is returned to the caller.

    Error Output Processing
    ===== ====== ==========

    If standard output was printed or displayed, then any error output will be printed
    immediately after it.

    If standard output was convertd to a DataFrame and returned, and
    subsequently displayed by the iPython shell, then any error output will be
    printed before the DataFrame is displayed.

    Environment Variables
    =========== =========

    This module directly uses the following environment variables:

    KGTK_AUTO_DISPLAY_HTML
    KGTK_AUTO_DISPLAY_JSON
    KGTK_AUTO_DISPLAY_MD
    KGTK_UNQUOTE_COLUMN_NAMES
    KGTK_BASH_COMMAND
    KGTK_KGTK_COMMAND

    """

    # Important prefixes to look for in standard output:
    MD_SIGIL: str = "|"
    JSON_SIGIL: str = "["
    JSONL_MAP_SIGIL: str = "{"
    HTML_SIGIL: str = "<!DOCTYPE html>"
    USAGE_SIGIL: str = "usage:"  # Output from `kgtk --help` or `kgtk command --help`
    GRAPH_CACHE_SIGIL: str = "Graph Cache"  # Output from `kgtk query --show-cache`

    # Set the defaults:
    if auto_display_html is None:
        auto_display_html = os.getenv("KGTK_AUTO_DISPLAY_HTML",
                                      "true").lower() in ["true", "yes", "y"]
    if auto_display_json is None:
        auto_display_json = os.getenv("KGTK_AUTO_DISPLAY_JSON",
                                      "true").lower() in ["true", "yes", "y"]
    if auto_display_md is None:
        auto_display_md = os.getenv("KGTK_AUTO_DISPLAY_MD",
                                    "false").lower() in ["true", "yes", "y"]
    if unquote_column_names is None:
        unquote_column_names = os.getenv(
            "KGTK_UNQUOTE_COLUMN_NAMES",
            "true").lower() in ["true", "yes", "y"]

    # Why not os.getenv("KGTK_BASH_COMMAND", "bash")? Splitting it up makes
    # mypy happier.
    if bash_command is None:
        bash_command = os.getenv("KGTK_BASH_COMMAND")
    if bash_command is None:
        bash_command = "bash"

    if kgtk_command is None:
        kgtk_command = os.getenv("KGTK_KGTK_COMMAND")
    if kgtk_command is None:
        kgtk_command = "kgtk"

    # Figure out the input DataFrame and pipeline arguments:
    in_df: typing.Optional[pandas.DataFrame] = None
    pipeline: str
    if isinstance(arg1, str):
        if arg2 is not None:
            raise ValueError(
                "kgtk(arg1, arg2): arg2 is not allowed when arg1 is a string")
        pipeline = arg1
    elif isinstance(arg1, pandas.DataFrame):
        if arg2 is None:
            raise ValueError(
                "kgtk(arg1, arg2): arg2 is required when arg1 is a DataFrame")
        in_df = arg1
        pipeline = arg2

    if df is not None:
        if in_df is not None:
            raise ValueError(
                "kgtk(): df= is not allowed when arg1 is a DataFrame")
        in_df = df

    if len(pipeline) == 0:
        raise ValueError("kgtk(...): the pipeline is empty")
    pipeline = kgtk_command + " " + ' '.join(pipeline.splitlines())

    # If we were supplied an input DataFrame, convert it to KGTK format.
    #
    # TODO: The conversion should optionally escape internal `|` characters as `\|`.
    in_tsv: typing.Optional[str] = None
    if in_df is not None:
        in_tsv = in_df.to_csv(
            sep='\t',
            index=False,
            quoting=csv.QUOTE_NONNUMERIC,
            quotechar='"',
            doublequote=False,
            escapechar='\\',
        )
        if unquote_column_names:
            # Pandas will have treated the column names as strings and quoted
            # them.  By convention, KGTK column names are symbols.  So, we will
            # remove double quotes from the outside of each column name.
            #
            # TODO: Handle the troublesome case of a double quote inside a column
            # name.
            header, body = in_tsv.split('\n', 1)
            column_names = header.split('\t')
            column_names = [
                x[1:-1] if x.startswith('"') else x for x in column_names
            ]
            header = "\t".join(column_names)
            in_tsv = header + "\n" + body

    # Execute the KGTK command pipeline:
    outbuf: StringIO = StringIO()
    errbuf: StringIO = StringIO()

    try:
        sh_bash = sh.Command(bash_command)
        sh_bash("-c", pipeline, _in=in_tsv, _out=outbuf, _err=errbuf)

    except sh.ErrorReturnCode as e:
        # The pipeline returned an error.  stderr should hav ean error message.
        errmsg: str = errbuf.getvalue()
        if len(errmsg) > 0:
            print(errbuf.getvalue())
        else:
            print(str(e))
        return None

    output: str = outbuf.getvalue()

    # Decide what to do based on the start of the output:
    result: typing.Optional[pandas.DataFrame] = None
    if len(output) == 0:
        pass  # No standard output

    elif output.startswith(MD_SIGIL):
        # Process Markdown output.
        if auto_display_md:
            display(Markdown(output))
        else:
            print(output)

    elif output.startswith(JSON_SIGIL) or output.startswith(JSONL_MAP_SIGIL):
        # Process JSON output.
        if auto_display_json:
            display(JSON(json.loads(output)))
        else:
            print(output)

    elif output[:len(HTML_SIGIL)].casefold() == HTML_SIGIL.casefold():
        # Process HTML output.
        if auto_display_html:
            display(HTML(output))
        else:
            print(output)

    elif output[:len(USAGE_SIGIL)].casefold() == USAGE_SIGIL.casefold():
        # Process --help output.
        print(output)

    elif output[:len(GRAPH_CACHE_SIGIL)].casefold(
    ) == GRAPH_CACHE_SIGIL.casefold():
        # Process `kgtk query --show-cache` output.
        print(output)

    else:
        # Assume that anything else is KGTK formatted output.  Convert it to a
        # pandas DataFrame and return it.
        #
        # TODO: Test this conversion with all KTK datatypes.  Language-qualified
        # strings are problematic.  Check what happens to quantites, date/times,
        # and locations.
        #
        # TODO: Remove the escape character from internal `|` characters?
        # If we do that, should we detect KGTK lists and complain?
        # `\|` -> `|`
        outbuf.seek(0)
        result = pandas.read_csv(
            outbuf,
            sep='\t',
            quotechar='"',
            doublequote=False,
            escapechar='\\',
        )

    outbuf.close()

    # Any error messages? If so, print the at the end.
    errout: str = errbuf.getvalue()
    if len(errout) > 0:
        print(errout)

    return result
Beispiel #5
0
    def job_upgrade(self, py_path):
        parser = argparse.ArgumentParser(prefix_chars=prefix)
        parser.add_argument('--platform',
                            '-pm',
                            type=Platform,
                            help='Working platform')
        parser.add_argument('--name',
                            '-n',
                            type=str,
                            help='Name of script file',
                            default='default.py',
                            nargs='+',
                            action=JoinAction)
        parser.add_argument('--profile',
                            '-p',
                            type=str,
                            help='Name of profile',
                            default='DemoProfile',
                            nargs='+',
                            action=JoinAction)
        parser.add_argument('--old_job_id',
                            type=str,
                            help='ID of old version job',
                            default=None,
                            nargs='+',
                            action=JoinAction)
        parser.add_argument('--validator',
                            '-v',
                            help='name of class Validator',
                            type=str,
                            nargs='+',
                            action=JoinAction)
        parser.add_argument('--validator_path',
                            '-vp',
                            help='path to file with class Validator',
                            type=str,
                            nargs='+',
                            action=JoinAction)
        parser.add_argument('--output_path',
                            '-o',
                            type=str,
                            help='Output GCS path',
                            default='',
                            nargs='+',
                            action=JoinAction)
        print("Parameters string = <<<{}>>>".format(py_path))

        args = parser.parse_known_args(py_path.split())
        prf_name = args[0].profile
        prf = Profile.get(prf_name)
        if prf is None:
            raise RuntimeError(
                'Provide parameters profile {} does not exist.'.format(
                    prf_name))

        session, job, job_name, output_path = self.build_data_job(args, prf)

        # noinspection PyTypeChecker
        display(
            HTML(
                '<a href="{url}/{job_name}?project={project}&region={region}">{job_name}</a>'
                .format(url=DATAPROC_JOBS_URL,
                        job_name=job_name,
                        project=prf.project,
                        region=prf.region)))

        validator_module = run_path(args[0].validator_path)

        executor = JobUpgradeExecutor(job, session, args[0].old_job_id)
        res = executor.submit_upgrade_job(validator=args[0].validator,
                                          validator_path=validator_module,
                                          run_async=prf.job_async)
        job_tracker[job_name] = res
        # noinspection PyTypeChecker
        display(JSON(res))

        job_reference = [
            '#Use job_{job_name} instance to browse job properties.'.format(
                job_name=job_name),
            "job_{job_name} = job_tracker['{job_name}']".format(
                job_name=job_name)
        ]
        get_ipython().set_next_input('\n'.join(job_reference))
Beispiel #6
0
    def py_deploy(self, py_path):
        parser = argparse.ArgumentParser(prefix_chars=prefix)
        parser.add_argument('--model',
                            '-n',
                            type=str,
                            help='Name of model',
                            nargs='+',
                            action=JoinAction)
        parser.add_argument('--platform',
                            '-pm',
                            type=Platform,
                            help='Working platform')
        parser.add_argument('--profile',
                            '-p',
                            type=str,
                            help='Name of profile',
                            default='AIDemoProfile',
                            nargs='+',
                            action=JoinAction)

        args = parser.parse_known_args(py_path.split())
        prf_name = args[0].profile
        prf = Profile.get(prf_name)
        if prf is None:
            raise RuntimeError(
                'Provide parameters profile {} does not exist.'.format(
                    prf_name))
        cred = prf.use_cloud_engine_credentials

        if args[0].platform == Platform.GCP:
            path_of_model = prf.path_to_saved_model
            args_dct = prf.arguments
            args_dct['pythonVersion'] = prf.python_version
            args_dct['runtimeVersion'] = prf.runtime_version
            args_dct['deploymentUri'] = f"{path_of_model}"

            deployment_artifacts = []
            for a in prf.artifacts:
                if a.startswith("gs://"):
                    deployment_artifact = Artifact(file_name=a,
                                                   path=path_of_model)
                else:
                    fname = os.path.basename(a)
                    deployment_artifact = Artifact(file_name=fname,
                                                   path=path_of_model)
                deployment_artifacts.append(deployment_artifact)

            m_builder = ModelBuilder()
            m_builder = m_builder.name(args[0].model).files_root(prf.root_path)
            if prf.custom_code is not None:
                m_builder = m_builder.custom_predictor_path(prf.custom_code)

            model = (m_builder.artifacts(deployment_artifacts).is_tuning(
                False).build())

            ai_job_builder = AIJobBuilder()
            ai_job_builder = ai_job_builder.model(model).package_dst(
                prf.package_dst)
            if prf.custom_code is not None:
                ai_job_builder = ai_job_builder.package_src(prf.root_path)
            ai_job = ai_job_builder.deploy_input(args_dct).build()

        job_name = '{}_{}'.format(prf.job_prefix,
                                  int(datetime.now().timestamp()))
        project = prf.project if hasattr(prf, "project") else prf.job_prefix
        ai_region = prf.ai_region if hasattr(prf, "ai_region") else prf.region
        session = SessionFactory(platform=args[0].platform).build_session(
            job_bucket=prf.bucket,
            job_region=prf.region,
            cluster=prf.cluster,
            job_project_id=project,
            ml_region=ai_region,
            use_cloud_engine_credentials=cred)
        if args[0].platform == Platform.GCP:
            executor = AIPlatformJobExecutor(session,
                                             ai_job,
                                             wait_delay=10,
                                             wait_tries=1000)
            if prf.is_new_model:
                response = executor.submit_deploy_model_job(
                    prf.version_name, create_new_model=True)
            else:
                response = executor.submit_deploy_model_job(prf.version_name)
            job_tracker[job_name] = executor
            # noinspection PyTypeChecker
            display(
                HTML(
                    '<a href="{url}/{path_of_model}?project={project}">Deploy model path {job_name}</a>'
                    .format(url=STORAGE_BROWSER_URL,
                            path_of_model=path_of_model.split('gs://')[1],
                            job_name=job_name,
                            project=prf.project)))
        else:
            script_name = args[0].model
            #TODO: args={}
            executor = SageMakerExecutor(session,
                                         prf,
                                         mode='deploy',
                                         py_script_name=os.path.join(
                                             prf.root_path, script_name),
                                         args={})
            predictor, response = executor.submit_deploy_model_job()
            job_tracker[job_name] = predictor
        # noinspection PyTypeChecker
        display(JSON(response))
        job_reference = [
            '#Use job_{job_name} instance to browse job properties.'.format(
                job_name=job_name),
            "#job_tracker['{job_name}']".format(job_name=job_name)
        ]
        get_ipython().set_next_input('\n'.join(job_reference))
Beispiel #7
0
    def py_train(self, py_path):
        parser = argparse.ArgumentParser(prefix_chars=prefix)
        parser.add_argument('--platform',
                            '-pm',
                            type=Platform,
                            help='Working platform')
        parser.add_argument('--name',
                            '-n',
                            type=str,
                            help='Train script module name',
                            default='./',
                            nargs='+',
                            action=JoinAction)
        parser.add_argument('--profile',
                            '-p',
                            type=str,
                            help='Name of profile',
                            default='AIDemoProfile',
                            nargs='+',
                            action=JoinAction)
        parser.add_argument('--output_path',
                            '-o',
                            type=str,
                            help='Output GCS path',
                            default='',
                            nargs='+',
                            action=JoinAction)
        args = parser.parse_known_args(py_path.split())
        script_name = args[0].name
        prf_name = args[0].profile
        prf = Profile.get(prf_name)
        package_src = prf.root_path
        if prf is None:
            raise RuntimeError(
                'Provide parameters profile {} does not exist.'.format(
                    prf_name))

        args_dct = ExecMagic.convert(args[1])
        cred = prf.use_cloud_engine_credentials
        project = prf.project if hasattr(prf, "project") else prf.job_prefix
        ai_region = prf.ai_region if hasattr(prf, "ai_region") else prf.region
        session = SessionFactory(platform=args[0].platform).build_session(
            job_bucket=prf.bucket,
            job_region=prf.region,
            cluster=prf.cluster,
            job_project_id=project,
            ml_region=ai_region,
            use_cloud_engine_credentials=cred)
        job_name = '{}_{}'.format(prf.job_prefix,
                                  int(datetime.now().timestamp()))
        if args[0].platform == Platform.GCP:
            output_path = '{}/{}'.format(args[0].output_path, job_name)

            args_dct = {**prf.arguments, **args_dct}
            args_dct['--output_path'] = output_path

            arguments = Arguments()
            arguments.set_args(**args_dct)
            training_input = {
                "region":
                prf.ai_region,
                "scaleTier":
                prf.scale_tier,
                "jobDir":
                output_path,
                "pythonModule":
                '{}.{}'.format(prf.package_name,
                               script_name.split('.py')[0]),
                "runtimeVersion":
                prf.runtime_version,
                "pythonVersion":
                prf.python_version
            }
            m_builder = ModelBuilder()
            model = m_builder.name(job_name).train_arguments(arguments).build()
            ai_job_builder = AIJobBuilder()
            ai_job = (ai_job_builder.model(model).package_src(
                package_src).package_dst('{}/{}'.format(
                    prf.package_dst,
                    job_name)).train_input(training_input).name(
                        job_name).job_dir(output_path).build())

            # noinspection PyTypeChecker
            display(
                HTML(
                    '<a href="{url}/{job_name}/charts/cpu?project={project}">{job_name}</a>'
                    .format(url=AI_JOBS_URL,
                            job_name=job_name,
                            project=prf.project)))

            executor = AIPlatformJobExecutor(session, ai_job, 10, 1000)
        else:
            for k in args_dct.copy():
                args_dct[re.sub("--", '', k)] = args_dct[k]
                args_dct.pop(k)
            executor = SageMakerExecutor(session,
                                         prf,
                                         mode='train',
                                         py_script_name=os.path.join(
                                             package_src, script_name),
                                         args=args_dct)

        response = executor.submit_train_job()

        if args[0].platform == Platform.GCP:
            job_tracker[job_name] = executor
            # noinspection PyTypeChecker
            display(
                HTML(
                    '<a href="{url}/{output_path}?project={project}">Output Data {job_name}</a>'
                    .format(url=STORAGE_BROWSER_URL,
                            output_path=output_path.split('gs://')[1],
                            job_name=job_name,
                            project=prf.project)))
        else:
            job_tracker[job_name] = executor.executor
            display(
                HTML('<a href="{url}">{job_name}</a>'.format(
                    url=response['model_data'],
                    job_name=response['model_data'])))
        display(JSON(response))
        job_reference = [
            '#Use job_{job_name} instance to browse job properties.'.format(
                job_name=job_name),
            "#job_{job_name} = job_tracker['{job_name}']".format(
                job_name=job_name)
        ]
        get_ipython().set_next_input('\n'.join(job_reference))
Beispiel #8
0
    def py_data(self, py_path):
        parser = argparse.ArgumentParser(prefix_chars=prefix)
        parser.add_argument('--platform',
                            '-pm',
                            type=Platform,
                            help='Working platform')
        parser.add_argument('--name',
                            '-n',
                            type=str,
                            help='Name of script file',
                            default='default.py',
                            nargs='+',
                            action=JoinAction)
        parser.add_argument('--profile',
                            '-p',
                            type=str,
                            help='Name of profile',
                            default='DemoProfile',
                            nargs='+',
                            action=JoinAction)
        parser.add_argument('--output_path',
                            '-o',
                            type=str,
                            help='Output GCS path',
                            default='',
                            nargs='+',
                            action=JoinAction)
        print("Parameters string = <<<{}>>>".format(py_path))

        args = parser.parse_known_args(py_path.split())
        prf_name = args[0].profile
        prf = Profile.get(prf_name)
        if prf is None:
            raise RuntimeError(
                'Provide parameters profile {} does not exist.'.format(
                    prf_name))

        session, job, job_name, output_path = self.build_data_job(args, prf)

        if args[0].platform == Platform.GCP:
            # noinspection PyTypeChecker
            display(
                HTML(
                    '<a href="{url}/{job_name}?project={project}&region={region}">{job_name}</a>'
                    .format(url=DATAPROC_JOBS_URL,
                            job_name=job_name,
                            project=prf.project,
                            region=prf.region)))
            executor = DataprocExecutor(job, session)
            res = executor.submit_job(run_async=prf.job_async)
        else:
            executor = EmrExecutor(job, session)
            res = executor.submit_job(run_async=prf.job_async)

        job_tracker[job_name] = res
        # noinspection PyTypeChecker

        display(
            HTML(
                '<a href="{url}/{output_path}?{region}">Output Data {job_name}</a>'
                .format(
                    url=STORAGE_BROWSER_URL
                    if args[0].platform == Platform.GCP else S3_BROWSER_URL,
                    output_path=output_path.split('gs://')[1]
                    if args[0].platform == Platform.GCP else
                    f"{prf.bucket}/emr/{res['placement']['cluster_id']}/steps/{res['placement']['step_id']}/",
                    job_name=job_name,
                    region=f'project={prf.project}' if args[0].platform
                    == Platform.GCP else f'region={prf.region}')))

        job_reference = [
            '#Use job_{job_name} instance to browse job properties.'.format(
                job_name=job_name),
            "#job_{job_name} = job_tracker['{job_name}']".format(
                job_name=job_name)
        ]
        display(JSON(res))
        get_ipython().set_next_input('\n'.join(job_reference))
Beispiel #9
0
 def respond_json(self, data):
     display(Markdown("Response Set as JSON, preview below: "))
     display(JSON(data, metadata={"naas_api": True}))
Beispiel #10
0
 def respond_json(self, data):
     display(JSON(data, metadata={"naas_api": True}))