コード例 #1
0
class LSFLauncher(BatchSystemLauncher):
    """A BatchSystemLauncher subclass for LSF."""

    submit_command = List(['bsub'], config=True,
                          help="The PBS submit command ['bsub']")
    delete_command = List(['bkill'], config=True,
                          help="The PBS delete command ['bkill']")
    job_id_regexp = CRegExp(r'\d+', config=True,
                            help="Regular expresion for identifying the job ID [r'\d+']")

    batch_file = Unicode('')
    job_array_regexp = CRegExp('#BSUB[ \t]-J+\w+\[\d+-\d+\]')
    job_array_template = Unicode('#BSUB -J ipengine[1-{n}]')
    queue_regexp = CRegExp('#BSUB[ \t]+-q[ \t]+\w+')
    queue_template = Unicode('#BSUB -q {queue}')

    def start(self, n):
        """Start n copies of the process using LSF batch system.
        This cant inherit from the base class because bsub expects
        to be piped a shell script in order to honor the #BSUB directives :
        bsub < script
        """
        # Here we save profile_dir in the context so they
        # can be used in the batch script template as {profile_dir}
        self.write_batch_script(n)
        #output = check_output(self.args, env=os.environ)
        piped_cmd = self.args[0]+'<\"'+self.args[1]+'\"'
        self.log.debug("Starting %s: %s", self.__class__.__name__, piped_cmd)
        p = Popen(piped_cmd, shell=True,env=os.environ,stdout=PIPE)
        output,err = p.communicate()
        job_id = self.parse_job_id(output)
        self.notify_start(job_id)
        return job_id
コード例 #2
0
class AutocallChecker(PrefilterChecker):

    priority = Integer(1000, config=True)

    function_name_regexp = CRegExp(
        re_fun_name,
        config=True,
        help="RegExp to identify potential function names.")
    exclude_regexp = CRegExp(
        re_exclude_auto,
        config=True,
        help="RegExp to exclude strings with this start from autocalling.")

    def check(self, line_info):
        "Check if the initial word/function is callable and autocall is on."
        if not self.shell.autocall:
            return None

        oinfo = line_info.ofind(
            self.shell)  # This can mutate state via getattr
        if not oinfo['found']:
            return None

        if callable(oinfo['obj']) \
               and (not self.exclude_regexp.match(line_info.the_rest)) \
               and self.function_name_regexp.match(line_info.ifun):
            return self.prefilter_manager.get_handler_by_name('auto')
        else:
            return None
コード例 #3
0
class SLURMLauncher(launcher.BatchSystemLauncher):
    """A BatchSystemLauncher subclass for SLURM
    """
    submit_command = List(['sbatch'],
                          config=True,
                          help="The SLURM submit command ['sbatch']")
    # Send SIGKILL instead of term, otherwise the job is "CANCELLED", not
    # "FINISHED"
    delete_command = List(['scancel', '--signal=KILL'],
                          config=True,
                          help="The SLURM delete command ['scancel']")
    job_id_regexp = CRegExp(
        r'\d+',
        config=True,
        help=
        "A regular expression used to get the job id from the output of 'sbatch'"
    )

    batch_file = Unicode(
        u'',
        config=True,
        help="The string that is the batch script template itself.")

    queue_regexp = CRegExp('#SBATCH\W+-p\W+\w')
    queue_template = Unicode('#SBATCH -p {queue}')
コード例 #4
0
class PBSLauncher(BatchSystemLauncher):
    """A BatchSystemLauncher subclass for PBS."""

    submit_command = List(['qsub'], config=True,
        help="The PBS submit command ['qsub']")
    delete_command = List(['qdel'], config=True,
        help="The PBS delete command ['qsub']")
    job_id_regexp = CRegExp(r'\d+', config=True,
        help="Regular expresion for identifying the job ID [r'\d+']")

    batch_file = Unicode('')
    job_array_regexp = CRegExp('#PBS\W+-t\W+[\w\d\-\$]+')
    job_array_template = Unicode('#PBS -t 1-{n}')
    queue_regexp = CRegExp('#PBS\W+-q\W+\$?\w+')
    queue_template = Unicode('#PBS -q {queue}')
コード例 #5
0
class BatchSystemLauncher(BaseLauncher):
    """Launch an external process using a batch system.

    This class is designed to work with UNIX batch systems like PBS, LSF,
    GridEngine, etc.  The overall model is that there are different commands
    like qsub, qdel, etc. that handle the starting and stopping of the process.

    This class also has the notion of a batch script. The ``batch_template``
    attribute can be set to a string that is a template for the batch script.
    This template is instantiated using string formatting. Thus the template can
    use {n} fot the number of instances. Subclasses can add additional variables
    to the template dict.
    """

    # Subclasses must fill these in.  See PBSEngineSet
    submit_command = List([''], config=True,
        help="The name of the command line program used to submit jobs.")
    delete_command = List([''], config=True,
        help="The name of the command line program used to delete jobs.")
    job_id_regexp = CRegExp('', config=True,
        help="""A regular expression used to get the job id from the output of the
        submit_command.""")
    batch_template = Unicode('', config=True,
        help="The string that is the batch script template itself.")
    batch_template_file = Unicode('', config=True,
        help="The file that contains the batch template.")
    batch_file_name = Unicode('batch_script', config=True,
        help="The filename of the instantiated batch script.")
    queue = Unicode('', config=True,
        help="The PBS Queue.")

    def _queue_changed(self, name, old, new):
        self.context[name] = new

    n = Integer(1)
    _n_changed = _queue_changed

    # not configurable, override in subclasses
    # PBS Job Array regex
    job_array_regexp = CRegExp('')
    job_array_template = Unicode('')
    # PBS Queue regex
    queue_regexp = CRegExp('')
    queue_template = Unicode('')
    # The default batch template, override in subclasses
    default_template = Unicode('')
    # The full path to the instantiated batch script.
    batch_file = Unicode('')
    # the format dict used with batch_template:
    context = Dict()
    def _context_default(self):
        """load the default context with the default values for the basic keys

        because the _trait_changed methods only load the context if they
        are set to something other than the default value.
        """
        return dict(n=1, queue='', profile_dir='', cluster_id='')
    
    # the Formatter instance for rendering the templates:
    formatter = Instance(EvalFormatter, (), {})


    def find_args(self):
        return self.submit_command + [self.batch_file]

    def __init__(self, work_dir='.', config=None, **kwargs):
        super(BatchSystemLauncher, self).__init__(
            work_dir=work_dir, config=config, **kwargs
        )
        self.batch_file = os.path.join(self.work_dir, self.batch_file_name)

    def parse_job_id(self, output):
        """Take the output of the submit command and return the job id."""
        m = self.job_id_regexp.search(output)
        if m is not None:
            job_id = m.group()
        else:
            raise LauncherError("Job id couldn't be determined: %s" % output)
        self.job_id = job_id
        self.log.info('Job submitted with job id: %r', job_id)
        return job_id

    def write_batch_script(self, n):
        """Instantiate and write the batch script to the work_dir."""
        self.n = n
        # first priority is batch_template if set
        if self.batch_template_file and not self.batch_template:
            # second priority is batch_template_file
            with open(self.batch_template_file) as f:
                self.batch_template = f.read()
        if not self.batch_template:
            # third (last) priority is default_template
            self.batch_template = self.default_template

            # add jobarray or queue lines to user-specified template
            # note that this is *only* when user did not specify a template.
            # print self.job_array_regexp.search(self.batch_template)
            if not self.job_array_regexp.search(self.batch_template):
                self.log.debug("adding job array settings to batch script")
                firstline, rest = self.batch_template.split('\n',1)
                self.batch_template = '\n'.join([firstline, self.job_array_template, rest])

            # print self.queue_regexp.search(self.batch_template)
            if self.queue and not self.queue_regexp.search(self.batch_template):
                self.log.debug("adding PBS queue settings to batch script")
                firstline, rest = self.batch_template.split('\n',1)
                self.batch_template = '\n'.join([firstline, self.queue_template, rest])

        script_as_string = self.formatter.format(self.batch_template, **self.context)
        self.log.debug('Writing batch script: %s', self.batch_file)

        with open(self.batch_file, 'w') as f:
            f.write(script_as_string)
        os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)

    def start(self, n):
        """Start n copies of the process using a batch system."""
        self.log.debug("Starting %s: %r", self.__class__.__name__, self.args)
        # Here we save profile_dir in the context so they
        # can be used in the batch script template as {profile_dir}
        self.write_batch_script(n)
        output = check_output(self.args, env=os.environ)

        job_id = self.parse_job_id(output)
        self.notify_start(job_id)
        return job_id

    def stop(self):
        output = check_output(self.delete_command+[self.job_id], env=os.environ)
        self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
        return output
コード例 #6
0
class WindowsHPCLauncher(BaseLauncher):

    job_id_regexp = CRegExp(r'\d+', config=True,
        help="""A regular expression used to get the job id from the output of the
        submit_command. """
        )
    job_file_name = Unicode('ipython_job.xml', config=True,
        help="The filename of the instantiated job script.")
    # The full path to the instantiated job script. This gets made dynamically
    # by combining the work_dir with the job_file_name.
    job_file = Unicode('')
    scheduler = Unicode('', config=True,
        help="The hostname of the scheduler to submit the job to.")
    job_cmd = Unicode(find_job_cmd(), config=True,
        help="The command for submitting jobs.")

    def __init__(self, work_dir='.', config=None, **kwargs):
        super(WindowsHPCLauncher, self).__init__(
            work_dir=work_dir, config=config, **kwargs
        )

    @property
    def job_file(self):
        return os.path.join(self.work_dir, self.job_file_name)

    def write_job_file(self, n):
        raise NotImplementedError("Implement write_job_file in a subclass.")

    def find_args(self):
        return ['job.exe']

    def parse_job_id(self, output):
        """Take the output of the submit command and return the job id."""
        m = self.job_id_regexp.search(output)
        if m is not None:
            job_id = m.group()
        else:
            raise LauncherError("Job id couldn't be determined: %s" % output)
        self.job_id = job_id
        self.log.info('Job started with id: %r', job_id)
        return job_id

    def start(self, n):
        """Start n copies of the process using the Win HPC job scheduler."""
        self.write_job_file(n)
        args = [
            'submit',
            '/jobfile:%s' % self.job_file,
            '/scheduler:%s' % self.scheduler
        ]
        self.log.debug("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))

        output = check_output([self.job_cmd]+args,
            env=os.environ,
            cwd=self.work_dir,
            stderr=STDOUT
        )
        job_id = self.parse_job_id(output)
        self.notify_start(job_id)
        return job_id

    def stop(self):
        args = [
            'cancel',
            self.job_id,
            '/scheduler:%s' % self.scheduler
        ]
        self.log.info("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
        try:
            output = check_output([self.job_cmd]+args,
                env=os.environ,
                cwd=self.work_dir,
                stderr=STDOUT
            )
        except:
            output = 'The job already appears to be stoppped: %r' % self.job_id
        self.notify_stop(dict(job_id=self.job_id, output=output))  # Pass the output of the kill cmd
        return output
コード例 #7
0
class SGELauncher(PBSLauncher):
    """Sun GridEngine is a PBS clone with slightly different syntax"""
    job_array_regexp = CRegExp('#\$\W+\-t')
    job_array_template = Unicode('#$ -t 1-{n}')
    queue_regexp = CRegExp('#\$\W+-q\W+\$?\w+')
    queue_template = Unicode('#$ -q {queue}')
コード例 #8
0
ファイル: test_traitlets.py プロジェクト: wyorgb/ipython
class CRegExpTrait(HasTraits):

    value = CRegExp(r'')
コード例 #9
0
class Knitpy(LoggingConfigurable):
    """Engine used to convert from python markdown (``*.pymd``) to html/latex/..."""
    keep_md = Bool(False,
                   config=True,
                   help="""Whether to keep the temporary md files""")

    log_to_file = Bool(False,
                       config=True,
                       help="""Whether to send the log to a file""")

    extra_document_configs = List(
        default_value=[],
        config=True,
        help="Additional configurations for FinalOutputDocuments")

    default_export_format = CaselessStrEnum(
        VALID_OUTPUT_FORMAT_NAMES,
        default_value=DEFAULT_OUTPUT_FORMAT_NAME,
        config=True,
        help=
        """The export format to be used (can't by from extra_document_configs!)."""
    )

    kernel_debug = Bool(
        False,
        config=True,
        help="""Whether to output kernel messages to the (debug) log""")

    timeout = Integer(10,
                      config=True,
                      help="timeout for individual code executions")

    # Things for the parser...
    chunk_begin = CRegExpMultiline(
        r'^\s*```+\s*{[.]?(?P<engine>[a-z]+)\s*(?P<args>.*)}\s*$',
        config=True,
        help="chunk begin regex (must include the named "
        "group 'engine' and 'args'")
    chunk_end = CRegExpMultiline(r'^\s*```+\s*$',
                                 config=True,
                                 help="chunk end regex")
    inline_code = CRegExpMultiline(
        r'`(?P<engine>[a-z]+) +([^`]+)\s*`',
        config=True,
        help="inline code regex (must include a named group 'engine')")
    comment_line = CRegExp(r'^\s*#', config=True, help="comment line regex")
    yaml_separator = CRegExpMultiline(r"^---\s*$",
                                      config=True,
                                      help="separator for the yaml metadata")

    def __init__(self, **kwargs):
        super(Knitpy, self).__init__(**kwargs)
        self.init_kernel_manager()
        self.init_engines()
        self.init_output_configurations()

    def init_kernel_manager(self):
        self._km = MultiKernelManager(log=self.log, parent=self)
        self._ksm = KernelSpecManager(log=self.log, parent=self)
        self._kernels = {}
        #ksm.find_kernel_specs()

    def init_engines(self):
        self._engines = {}
        self._engines["python"] = PythonKnitpyEngine(parent=self)
        # TODO: check that every kernel_name is in ksm.find_kernel_specs()

    def init_output_configurations(self):
        self._outputs = {}
        for config in DEFAULT_FINAL_OUTPUT_FORMATS:
            fod = FinalOutputConfiguration(parent=self, **config)
            self._outputs[config["name"]] = fod
            self._outputs[config["alias"]] = fod
        for config in self.extra_document_configs:
            fod = FinalOutputConfiguration(parent=self, **config)
            self._outputs[config["name"]] = fod
            self._outputs[config["alias"]] = fod

    def parse_document(self, input):
        if os.path.exists(input):
            filename = input
            f = codecs.open(filename, 'r', 'UTF-8')
            doc = f.read()
        else:
            doc = input
            filename = "anonymous_input"

        # the yaml can stay in the doc, pandoc will remove '---' blocks
        # pandoc will also do it's own interpretation and use title/author and so on...
        # ToDo: not sure of that should stay or if we should start with clean metadata
        # title, author, date
        # title: "A first try"
        # author: "Jan Schulz"
        # date: "Monday, February 23, 2015"
        # default values
        metadata = {
            "title": filename,
            "author": getpass.getuser(),
            "date": datetime.datetime.now().strftime("%A, %B %d, %Y")
        }

        pos = 0
        start = self.yaml_separator.search(doc, pos)
        if not start is None:
            end = self.yaml_separator.search(doc, start.end())
            if end is None:
                raise ParseException("Found no metadata end separator.")
            try:
                res = yaml.load(doc[start.end():end.start()])
                self.log.debug("Metadata: %s", res)
                metadata.update(res)
            except Exception as e:
                raise ParseException("Malformed metadata: %s" % str(e))

        parsed_doc = self._parse_blocks(doc)
        return parsed_doc, metadata

    def _parse_blocks(self, doc):
        result = []
        doc_pos = 0
        blocks = self.chunk_begin.finditer(doc)
        for block_start in blocks:
            # process the text before the match
            text = doc[doc_pos:block_start.start()]
            self._parse_inline(text, result)
            # TODO: somehow a empty line before a codeblock vanishes, so add one here
            result.append((TTEXT, "\n"))
            # now the block itself
            # find the end of the block
            block_end = self.chunk_end.search(doc, block_start.end())
            if block_end is None:
                raise ParseException(
                    "Found no end for the block starting at pos %s" %
                    block_start.end())
            result.append((TBLOCK, (doc[block_start.end():block_end.start()],
                                    block_start.groupdict())))
            doc_pos = block_end.end()
        # text after the last block
        self._parse_inline(doc[doc_pos:], result)
        return result

    def _parse_inline(self, text, result):
        text_pos = 0
        for inline in self.inline_code.finditer(text):
            # text before inline code
            result.append((TTEXT, text[text_pos:inline.start()]))
            # inline code
            engine_offset = len(inline.group('engine')) + 1
            result.append(
                (TINLINE,
                 (text[inline.start() + engine_offset + 1:inline.end() - 1],
                  inline.groupdict())))
            text_pos = inline.end()
        # text after the last inline code
        result.append((TTEXT, text[text_pos:]))

    def _all_lines_comments(self, lines):
        for line in lines.split("\n"):
            if not self.comment_line.match(line):
                return False
        return True

    def convert(self, parsed, output):

        context = ExecutionContext(output=output)

        for entry in parsed:
            if entry[0] == TBLOCK:
                context.mode = "block"
                self._process_code(entry[1], context=context)
            elif entry[0] == TINLINE:
                context.mode = "inline"
                self._process_code(entry[1], context=context)
            elif entry[0] == TTEXT:
                output.add_text(entry[1])
            else:
                raise ParseException("Found something unexpected: %s" % entry)
        # process_code opened kernels, so close them here
        self._km.shutdown_all()
        # workaround for https://github.com/ipython/ipython/issues/8007
        # FIXME: remove if IPython >3.0 is in require
        self._km._kernels.clear()
        self._kernels = {}
        return output

    def _process_code(self, input, context):

        context.execution_started()

        # setup the execution context
        code = input[0]
        intro = input[1]
        engine_name = intro["engine"]
        raw_args = intro.get("args", "")

        args = self._parse_args(raw_args)

        # for compatibility with knitr, where python is specified via "{r engine='python'}"
        if "engine" in args:
            engine_name = args.pop("engine")
            self.log.debug("Running on engine: %s", engine_name)

        try:
            engine = self._engines[engine_name]
        except:
            raise ParseException("Unknown codeblock type: %s" % engine_name)
        assert not engine is None, "Engine is None"
        context.engine = engine
        if not engine.name in context.enabled_documents:
            plotting_formats = context.output.export_config.accepted_image_formats
            plot_code = engine.get_plotting_format_code(plotting_formats)
            self._run_silently(context.engine.kernel, plot_code)
            context.enabled_documents.append(engine.name)
            self.log.info("Enabled image formats '%s' in engine '%s'.",
                          plotting_formats, engine.name)

        # configure the context
        if "echo" in args:
            context.echo = args.pop("echo")

        # eval=False means that we don't execute the block at all
        if "eval" in args:
            _eval = args.pop("eval")
            if _eval is False:
                if context.echo:
                    code = code.replace(os.linesep, "\n").lstrip("\n")
                    context.output.add_code(code, language=engine.language)
                return

        if "results" in args:
            context.results = args.pop("results")

        if "include" in args:
            context.include = args.pop("include")

        if "chunk_label" in args:
            context.chunk_label = args.pop("chunk_label")
        else:
            context.chunk_label = u"unnamed-chunk-%s" % context.chunk_number

        if "comment" in args:
            context.comment = args.pop("comment")

        if args:
            self.log.debug("Found unhandled args: %s", args)

        lines = ''
        code_lines = code.split('\n')
        space_re = re.compile(r'^([\s]+)')
        spaces = []

        # TODO: this whole "merge with the next line" should be rewritten as a generator
        def loop_continues(line_no):
            if len(code_lines) <= line_no:
                return False

            candidate = code_lines[line_no]
            # comments should be swallowed if a line further down has code in it with the
            # right number of spaces in front
            while candidate.strip() == "" or self._all_lines_comments(
                    candidate):
                line_no += 1
                if len(code_lines) <= line_no:
                    return False
                candidate = code_lines[line_no]
            # the next code line must have either the same number of spaces (still in a loop),
            # or less spaces as in 'spaces' (nested loop) or none (end of loop). If more spaces
            # are found or different types of spaces, this will result in an error which will be
            # shown when the code is executed...
            while spaces:
                possible_space = spaces[-1]
                if candidate[:len(possible_space)] == possible_space:
                    # ok, we are at the "right" level of space
                    return True
                # not our "space", so remove it and try the one one nesting above
                spaces.pop()
            return False

        for line_no in range(len(code_lines)):
            cur_line = code_lines[line_no]
            lines = lines + cur_line
            # check if we are in a loop and if so, if the next line also belongs to this loop
            # this only catches the case where we are *in* a loop and not the loop start (the line
            #  with a ':' in it. That line is catched by the is_complete call below. nested loops
            #  are also catched due to the space in front of it
            m = space_re.match(cur_line)
            if m:
                cur_space = m.group(0)
                spaces.append(cur_space)
                if loop_continues(line_no + 1):
                    lines += "\n"
                    continue

            if spaces:
                # we are in a loop, as spaces has some spaces in it, but the code above didn't find
                # any spaces in front of the line -> this is the case when loop_continues found a
                # new codeline from this loop after a comment with different spaces in front of
                # it or an empty line. This could be such an empty/comment line and we have to
                # look at the next line as well!
                if cur_line.strip() == "" or self._all_lines_comments(
                        cur_line):
                    lines += "\n"
                    continue
            # we have a block of code, including all lines of a loop
            msg = engine.kernel.is_complete(lines + "\n\n")
            reply = engine.kernel.get_shell_msg(timeout=self.timeout)
            assert reply['msg_type'] == 'is_complete_reply', str(reply)
            if self.kernel_debug:
                self.log.debug("completion_request: %s", msg)
            if reply['content']['status'] == 'complete':
                if lines.strip() == "":
                    # No requests for "no code"
                    lines = ""
                    continue
                elif self._all_lines_comments(lines):
                    # comments should go to to the next code block
                    lines += "\n"
                    continue
                # run the lines
                self._run_lines(lines + "\n", context)
                lines = ""
            elif reply['content']['status'] == 'invalid':
                # TODO: not sure how this should be handled
                # Either abort execution of the whole file or just retry with the next line?
                # However this should be handled via a user message
                self.log.info("Code invalid:\n%s", lines)
                context.output.add_code(lines, language=engine.language)
                context.output.add_execution_error("Code invalid")
                lines = ""
            else:
                # the "incomplete" case: don't run anything wait for the next line
                lines += "\n"

        # This can only happen if the last line is incomplete
        # This will always result in an error!
        if lines.strip() != "":
            self._run_lines(lines, context)

        context.execution_finished()

    def _parse_args(self, raw_args):
        # Todo: knitr interprets all values, so code references are possible
        # This also means that we have to do args parsing at interpretation time, so that
        # variable from other code can be taken into account..

        args = {}
        if raw_args.strip() == "":
            return args

        converter = {
            "True": True,
            "False": False,
            "None": None,
            "T": True,  # Rs True/False
            "F": False,
            "TRUE": True,
            "FALSE": False,
            # treat Rs NA as None, probably overkill to look for float("NA")
            "NA": None,
            "NULL": None,
        }

        # The first is special as that can be the name of the chunk
        first = True
        for arg in raw_args.split(","):
            arg = arg.strip()
            if not "=" in arg:
                if not first:
                    raise ParseException(
                        "Malformed options for code chunk: '%s' in '%s'" %
                        (arg, raw_args))
                args["chunk_label"] = arg
                continue
            first = False
            label, value = arg.split("=")
            v = value.strip()
            # convert to real types.
            # TODO: Should be done by submitting the whole thing to the kernel, like knitr does
            # -> variables form one codecell can be used in the args of the next one ...
            if (v[0] == '"' and v[-1] == '"'):
                v = v[1:-1]
            elif (v[0] == "'" and v[-1] == "'"):
                v = v[1:-1]
            elif v in converter:
                v = converter[v]
            else:
                try:
                    v = int(v)
                except:
                    self.log.error(
                        "Could not decode option value: '%s=%s'. Discarded...",
                        label, v)
                    continue

            args[label.strip()] = v

        return args

    def _run_lines(self, lines, context):
        kernel = context.engine.kernel
        msg_id = kernel.execute(lines)
        if self.kernel_debug:
            self.log.debug("Executing lines (msg_id=%s):\n%s", msg_id, lines)
        # wait for finish, with timeout
        # At first we have to wait until the kernel tells us it is finished with running the code
        while True:
            try:
                msg = kernel.shell_channel.get_msg(timeout=self.timeout)
                if self.kernel_debug:
                    self.log.debug("shell msg: %s", msg)
            except Empty:
                # This indicates that something bad happened, as AFAIK this should return...
                self.log.error("Timeout waiting for execute reply")
                raise KnitpyException("Timeout waiting for execute reply.")
            if msg['parent_header'].get('msg_id') == msg_id:
                # It's finished, and we got our reply, so next look at the results
                break
            else:
                # not our reply
                self.log.debug(
                    "Discarding message from a different client: %s" % msg)
                continue

        # Now look at the results of our code execution and earlier completion requests
        # We handle messages until the kernel indicates it's ide again
        status_idle_again = False
        while True:
            try:
                msg = kernel.get_iopub_msg(timeout=self.timeout)
            except Empty:
                # There should be at least some messages: we just executed code!
                # The only valid time could be when the timeout happened too early (aka long
                # running code in the document) -> we handle that below
                self.log.warn("Timeout waiting for expected IOPub output")
                break

            if msg['parent_header'].get('msg_id') != msg_id:
                if msg['parent_header'].get(
                        u'msg_type') != u'is_complete_request':
                    # not an output from our execution and not one of the complete_requests
                    self.log.debug(
                        "Discarding output from a different client: %s" % msg)
                else:
                    # complete_requests are ok
                    pass
                continue

            # Here we have some message which corresponds to our code execution
            msg_type = msg['msg_type']
            content = msg['content']

            # The kernel indicates some status: executing -> idle
            if msg_type == 'status':
                if content['execution_state'] == 'idle':
                    # When idle, the kernel has executed all input
                    status_idle_again = True
                    break
                else:
                    # the "starting execution" messages
                    continue
            elif msg_type == 'clear_output':
                # we don't handle that!?
                self.log.debug(
                    "Discarding unexpected 'clear_output' message: %s" % msg)
                continue
            ## So, from here on we have a messages with real content
            if self.kernel_debug:
                self.log.debug("iopub msg (%s): %s", msg_type, msg)
            if context.include:
                self._handle_return_message(msg, context)

        if not status_idle_again:
            self.log.error(
                "Code lines didn't execute in time. Don't use long-running code in "
                "documents or increase the timeout!")
            self.log.error("line(s): %s" % lines)

    def _handle_return_message(self, msg, context):
        if context.mode == "inline":
            #self.log.debug("inline: %s" % msg)
            if msg["msg_type"] == "execute_result":
                context.output.add_text(_plain_text(msg["content"]))
        elif context.mode == "block":
            #self.log.debug("block: %s" % msg)
            type = msg["msg_type"]
            if type == "execute_input":
                if context.echo:
                    context.output.add_code(_code(msg[u'content']),
                                            language=context.engine.language)
            elif type == "stream":
                # {u'text': u'a\nb\nc\n', u'name': u'stdout'}
                # TODO: format stdout and stderr differently?
                txt = msg["content"].get("text", "")
                if txt.strip() == "":
                    return
                if context.results == 'markup':
                    context.output.add_output(txt)
                elif context.results == 'asis':
                    context.output.add_asis(txt)
                elif context.results == 'hide':
                    return
                else:
                    # TODO: implement a caching system... again...
                    self.log.warn(
                        "Can't handle results='hold' yet, falling back to 'markup'."
                    )
                    context.output.add_output(txt)
            elif (type == "execute_result") or (type == "display_data"):
                if context.results == 'hide':
                    return
                if context.results == 'hold':
                    self.log.warn(
                        "Can't handle results='hold' yet, falling back to 'markup'."
                    )

                # Here we handle the output from the IPython display framework.
                # 1. If a object has a _display_ipython(), that will be called. This method should
                #    publish (one) display_data message and return -> the content ends up in
                #    "display_data" msg and the "executive_result" has no data
                # 2. else try different IPython.core.formatters for the object, which basically
                #    call the right _repr_<whatever>_ method to get a formated string in that
                #    mimetype. This is added as alternatives under content.data of the
                #    "executive_result".

                # data has/can have multiple types of the same message
                data = msg[u"content"][u'data']
                #self.log.debug(str(data))

                # handle plots
                #self.log.debug("Accepted image mimetypes: %s", context.output.export_config.accepted_image_mimetypes)
                for mime_type in context.output.export_config.accepted_image_mimetypes:
                    mime_data = data.get(mime_type, None)
                    if mime_data is None:
                        self.log.debug("No image found: %s", mime_type)
                        continue
                    try:
                        self.log.debug("Trying to include image...")
                        context.output.add_image(mime_type,
                                                 mime_data,
                                                 title="")
                    except KnitpyOutputException as e:
                        self.log.info("Couldn't include image: %s", e)
                        continue
                    return

                # now try some marked up text formats
                for mime_type in context.output.markup_mimetypes:
                    mime_data = data.get(mime_type, None)
                    if mime_data is None:
                        continue
                    try:
                        self.log.debug("Trying to include markup text...")
                        context.output.add_markup_text(mime_type, mime_data)
                    except KnitpyOutputException as e:
                        self.log.info("Couldn't include markup text: %s", e)
                        continue
                    return

                # as a last resort, try plain text...
                if u'text/plain' in data:
                    txt = data.get(u"text/plain", "")
                    if txt != "":
                        if context.results == 'markup':
                            context.output.add_output(txt)
                            if txt[-1] != "\n":
                                context.output.add_output("\n")
                        elif context.results == 'asis':
                            context.output.add_asis(txt)
                            if txt[-1] != "\n":
                                context.output.add_asis("\n")

                        return

                # If we are here,  we couldn't handle any of the more specific data types
                # and didn't find any output text
                excuse = "\n(Found data of type '{}', but couldn't handle it)\n"
                context.output.add_output(excuse.format(data.keys()))
            elif (type == "error"):
                ename = msg["content"].get("ename", "unknown exception")
                evalue = msg["content"].get("evalue",
                                            "unknown exception value")
                tb = msg["content"].get("traceback", "<not available>")
                if not is_string(tb):
                    # remove the first line...
                    tb = "\n".join(tb[1:])
                self.log.info(tb)
                #there are ansi escape sequences in the traceback, which kills pandoc :-(
                if u"\x1b[1;32m" in tb:
                    tb = "!! traceback unavailable due to included color sequences;\n" \
                         "!! execute `%colors NoColor` once before this line to remove them!"
                context.output.add_execution_error("%s: %s" % (ename, evalue),
                                                   tb)
            else:
                self.log.debug("Ignored msg of type %s" % type)

    def _run_silently(self, kc, lines):
        try:
            msg_id = kc.execute(lines + "\n\n",
                                silent=self.kernel_debug,
                                store_history=False)
            self.log.debug("Executed silent code: %s", lines)
            reply = kc.get_shell_msg(timeout=self.timeout)
            assert reply['parent_header'].get(
                'msg_id') == msg_id, "Wrong reply! " + str(reply)
            if self.kernel_debug:
                self.log.debug("Silent code shell reply: %s", reply)
        except Empty:
            self.log.error("Code took too long:\n %s", lines)

        # now empty the iopub channel (there is at least a "starting" message)
        while True:
            try:
                msg = kc.get_iopub_msg(timeout=0.1)
                if self.kernel_debug:
                    self.log.debug("Silent code iopub msg: %s", msg)
            except Empty:
                break

    def _get_kernel(self, engine):
        kernel_name = engine.kernel_name
        kernel_startup_lines = engine.startup_lines

        if not kernel_name in self._kernels:
            self.log.info("Starting a new kernel: %s" % kernel_name)
            kernelid = self._km.start_kernel(kernel_name=kernel_name)
            #km.list_kernel_ids()
            kn = self._km.get_kernel(kernelid)
            kc = kn.client()
            self._kernels[kernel_name] = kc
            # now initalize the channels
            kc.start_channels()
            kc.wait_for_ready()
            self._run_silently(kc, kernel_startup_lines)
            self.log.info("Executed kernel startup lines for engine '%s'.",
                          engine.name)

        return self._kernels[kernel_name]

    def get_output_format(self, fmt_name, config=None):
        self._ensure_valid_output(fmt_name)
        fod = self._outputs.get(fmt_name).copy()
        # self.log.info("%s: %s", fmt_name, config)
        if not config:
            pass
        elif isinstance(config, dict):
            fod.update(**config)
        elif config == "default":
            # html_document: default
            pass
        else:
            self.log.error(
                "Unknown config for document '%s': '%s'. Ignored...", fmt_name,
                config)
        return fod

    def _knit(self, input, outputdir_name, final_format="html", config=None):
        """Internal function to aid testing"""

        parsed, metadata = self.parse_document(input)  # sets kpydoc.parsed and
        final_format = self.get_output_format(final_format, config=config)

        md_temp = TemporaryOutputDocument(fileoutputs=outputdir_name,
                                          export_config=final_format,
                                          log=self.log,
                                          parent=self)

        # get the temporary md file
        self.convert(parsed, md_temp)

        return md_temp.content

    def render(self, filename, output=None):
        """
        Convert the filename to the given output format(s)
        """
        # Export each documents
        conversion_success = 0
        converted_docs = []

        # save here to change back after the conversation.
        orig_cwd = os.getcwd()
        needs_chdir = False

        # expand $HOME and so on...
        filename = expand_path(filename)
        filename = os.path.abspath(filename)
        self.log.info("Converting %s..." % filename)

        basedir = os.path.dirname(filename)
        basename = os.path.splitext(os.path.basename(filename))[0]

        # It's easier if we just change wd to the dir of the file
        if unicode_type(basedir) != py3compat.getcwd():
            os.chdir(basedir)
            needs_chdir = True
            self.log.info("Changing to working dir: %s" % basedir)
            filename = os.path.basename(filename)

        outputdir_name = os.path.splitext(basename)[0] + "_files"

        # parse the input document
        parsed, metadata = self.parse_document(filename)

        # get the output formats
        # order: kwarg overwrites default overwrites document
        output_formats = [self._outputs[self.default_export_format]]
        if output is None:
            self.log.debug("Converting to default output format [%s]!" %
                           self.default_export_format)
        elif output == "all":
            outputs = metadata.get("output", None)
            # if nothing is specified, we keep the default
            if outputs is None:
                self.log.debug(
                    "Did not find any specified output formats: using only default!"
                )
            else:
                output_formats = []
                for fmt_name, config in iteritems(outputs):
                    fod = self.get_output_format(fmt_name, config)
                    output_formats.append(fod)
                self.log.debug(
                    "Converting to all specified output formats: %s" %
                    [fmt.name for fmt in output_formats])
        else:
            self._ensure_valid_output(output)
            output_formats = [self._outputs[output]]

        for final_format in output_formats:
            self.log.info("Converting document %s to %s", filename,
                          final_format.name)
            # TODO: build a proper way to specify final output...

            md_temp = TemporaryOutputDocument(fileoutputs=outputdir_name,
                                              export_config=final_format,
                                              log=self.log,
                                              parent=self)

            # get the temporary md file
            self.convert(parsed, md_temp)
            if final_format.keep_md or self.keep_md:
                mdfilename = basename + "." + final_format.name + ".md"
                self.log.info("Saving the temporary markdown as '%s'." %
                              mdfilename)
                # TODO: remove the first yaml metadata block and
                # put "#<title>\n<author>\n<date>" before the rest
                with codecs.open(mdfilename, 'w+b', 'UTF-8') as f:
                    f.write(md_temp.content)

            # convert the md file to the final filetype
            input_format = "markdown" \
                           "+autolink_bare_uris" \
                           "+ascii_identifiers" \
                           "+tex_math_single_backslash-implicit_figures" \
                           "+fenced_code_attributes"

            extra = [
                "--smart",  # typographically correct output (curly quotes, etc)
                "--email-obfuscation",
                "none",  #do not obfuscation email names with javascript
                "--self-contained",  # include img/scripts as data urls
                "--standalone",  # html with header + footer
                "--section-divs",
            ]

            outfilename = basename + "." + final_format.file_extension

            # exported is irrelevant, as we pass in a filename
            exported = pandoc(source=md_temp.content,
                              to=final_format.pandoc_export_format,
                              format=input_format,
                              extra_args=extra,
                              outputfile=outfilename)
            self.log.info("Written final output: %s" % outfilename)
            converted_docs.append(os.path.join(basedir, outfilename))
        if needs_chdir:
            os.chdir(orig_cwd)
        return converted_docs

    def _ensure_valid_output(self, fmt_name):
        if fmt_name in self._outputs:
            return
        raise KnitpyException("Format '%s' is not a valid output format!" %
                              fmt_name)
コード例 #10
0
class PBSPROLauncher(launcher.PBSLauncher):
    """A BatchSystemLauncher subclass for PBSPro."""
    job_array_regexp = CRegExp('#PBS\W+-J\W+[\w\d\-\$]+')
    job_array_template = Unicode('#PBS -J 1-{n}')