Example #1
0
 def testQuoteUnquoteLatin1(self):
     "A latin-1 encoded string should be unmodified through quote and unquote"
     self.assertEqual("R\xe9sum\xe9",
                      utils.unquote(utils.quote("R\xe9sum\xe9")))
     self.assertEqual(utils.quote("R\xe9sum\xe9"), "R%E9sum%E9")
     self.assertEqual("R\xe9sum\xe9", utils.unquote("R%E9sum%E9"))
     self.assertEqual("R\xe9sum\xe9", utils.unquote(u"R%E9sum%E9"))
Example #2
0
    def read_stdout_stderr_files(self, run_dir):
        """
        This function reads both stdout and stderr files and populates
        these fields in the Job class.
        """
        my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000
        if self._output_file is None:
            # This is the case for SUBDAG jobs
            self._stdout_text = None
        else:
            basename = self._output_file
            if self._has_rotated_stdout_err_files:
                basename += ".%03d" % (self._job_output_counter)

            my_out_file = os.path.join(run_dir, basename)

            try:
                OUT = open(my_out_file, 'r')
                buffer = OUT.read()
                if len(buffer) > my_max_encoded_length:
                    buffer = buffer[:my_max_encoded_length]
                self._stdout_text = utils.quote("#@ 1 stdout\n" + buffer)
            except IOError:
                self._stdout_text = None
                if not self.is_noop_job():
                    logger.warning(
                        "unable to read output file: %s, continuing..." %
                        (my_out_file))
            else:
                OUT.close()

        if self._error_file is None:
            # This is the case for SUBDAG jobs
            self._stderr_text = None
        else:
            basename = self._error_file
            if self._has_rotated_stdout_err_files:
                basename += ".%03d" % (self._job_output_counter)

            my_err_file = os.path.join(run_dir, basename)

            try:
                ERR = open(my_err_file, 'r')
                buffer = ERR.read()
                if len(buffer) > my_max_encoded_length:
                    buffer = buffer[:my_max_encoded_length]
                self._stderr_text = utils.quote(buffer)
            except IOError:
                self._stderr_text = None
                if not self.is_noop_job():
                    logger.warning(
                        "unable to read error file: %s, continuing..." %
                        (my_err_file))
            else:
                ERR.close()
Example #3
0
    def read_stdout_stderr_files(self, run_dir):
        """
        This function reads both stdout and stderr files and populates
        these fields in the Job class.
        """
        my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000
        if self._output_file is None:
            # This is the case for SUBDAG jobs
            self._stdout_text = None
        else:
            basename = self._output_file
            if self._has_rotated_stdout_err_files:
                basename += ".%03d" % ( self._job_output_counter)

            my_out_file = os.path.join(run_dir, basename)

            try:
                OUT = open(my_out_file, 'r')
                buffer = OUT.read()
                if len( buffer ) > my_max_encoded_length :
                    buffer = buffer[:my_max_encoded_length]
                self._stdout_text = utils.quote("#@ 1 stdout\n" + buffer)
            except IOError:
                self._stdout_text = None
                if not self.is_noop_job():
                    logger.warning("unable to read output file: %s, continuing..." % (my_out_file))
            else:
                OUT.close()

        if self._error_file is None:
            # This is the case for SUBDAG jobs
            self._stderr_text = None
        else:
            basename = self._error_file
            if self._has_rotated_stdout_err_files:
                basename += ".%03d" % ( self._job_output_counter)

            my_err_file = os.path.join(run_dir, basename)

            try:
                ERR = open(my_err_file, 'r')
                buffer = ERR.read()
                if len( buffer ) > my_max_encoded_length :
                    buffer = buffer[:my_max_encoded_length]
                self._stderr_text = utils.quote(buffer)
            except IOError:
                self._stderr_text = None
                if not self.is_noop_job():
                    logger.warning("unable to read error file: %s, continuing..." % (my_err_file))
            else:
                ERR.close()
Example #4
0
    def read_job_error_file(self, store_monitoring_events=True):
        """
        Reads the job error file and updates job structures to store the
        the stderr of the condor job and also attempts to parse the hostname
        from the stderr of the job

        :param store_monitoring_events: whether to store any parsed monitoring events in the job
        :return:
        """
        my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000
        if self._error_file is None:
            # This is the case for SUBDAG jobs
            self._stderr_text = None
            return

        # Finally, read error file only
        run_dir = self._job_submit_dir
        basename = self.get_rotated_err_filename()
        my_err_file = os.path.join(run_dir, basename)

        try:
            ERR = open(my_err_file)
            # PM-1274 parse any monitoring events such as integrity related
            # from PegasusLite .err file
            job_stderr = self.split_task_output(ERR.read())
            buf = job_stderr.user_data

            if len(buf) > my_max_encoded_length:
                buf = buf[:my_max_encoded_length]
            self._stderr_text = utils.quote(buf)

            if store_monitoring_events:
                self._add_additional_monitoring_events(job_stderr.events)

            # PM-1355 attempt to determine the hostname from the pegasus lite job
            hostname_match = re_parse_pegasuslite_hostname.search(job_stderr.user_data)
            if hostname_match:
                # a match yes it is a PegasusLite job . gleam the hostname
                self._host_id = hostname_match.group(1)
                self._host_ip = hostname_match.group(2)

        except OSError:
            self._stderr_text = None
            if not self.is_noop_job():
                logger.warning(
                    "unable to read error file: %s, continuing..." % (my_err_file)
                )
        else:
            ERR.close()
Example #5
0
    def read_job_error_file(self, store_monitoring_events=True):
        """
        Reads the job error file and updates job structures to store the
        the stderr of the condor job and also attempts to parse the hostname
        from the stderr of the job

        :param store_monitoring_events: whether to store any parsed monitoring events in the job
        :return:
        """
        my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000
        if self._error_file is None:
            # This is the case for SUBDAG jobs
            self._stderr_text = None
            return

        # Finally, read error file only
        run_dir = self._job_submit_dir
        basename = self.get_rotated_err_filename()
        my_err_file = os.path.join(run_dir, basename)

        try:
            ERR = open(my_err_file, 'r')
            # PM-1274 parse any monitoring events such as integrity related
            # from PegasusLite .err file
            job_stderr = self.split_task_output(ERR.read())
            buf = job_stderr.user_data

            if len(buf) > my_max_encoded_length:
                buf = buf[:my_max_encoded_length]
            self._stderr_text = utils.quote(buf)

            if store_monitoring_events:
                self._add_additional_monitoring_events(job_stderr.events)

            # PM-1355 attempt to determine the hostname from the pegasus lite job
            hostname_match = re_parse_pegasuslite_hostname.search(job_stderr.user_data)
            if hostname_match:
                # a match yes it is a PegasusLite job . gleam the hostname
                self._host_id = hostname_match.group(1)

        except IOError:
            self._stderr_text = None
            if not self.is_noop_job():
                logger.warning("unable to read error file: %s, continuing..." % (my_err_file))
        else:
            ERR.close()
Example #6
0
    def read_job_out_file(self, out_file=None, store_monitoring_events=True):
        """
        This function reads both stdout and stderr files and populates
        these fields in the Job class.
        """
        my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000
        if self._output_file is None:
            # This is the case for SUBDAG jobs
            self._stdout_text = None

        if out_file is None:
            # PM-1297 only construct relative path if out_file is not explicitly specified
            run_dir = self._job_submit_dir

            # PM-1157 output file has absolute path from submit file
            # interferes with replay mode on another directory
            # basename = self._output_file

            basename = self._exec_job_id + ".out"
            if self._has_rotated_stdout_err_files:
                basename += ".%03d" % (self._job_output_counter)
            out_file = os.path.join(run_dir, basename)

        try:
            OUT = open(out_file, 'r')
            job_stdout = self.split_task_output(OUT.read())
            buf = job_stdout.user_data
            if len(buf) > my_max_encoded_length:
                buf = buf[:my_max_encoded_length]
            self._stdout_text = utils.quote("#@ 1 stdout\n" + buf)

            if store_monitoring_events:
                self._add_additional_monitoring_events(job_stdout.events)
        except IOError:
            self._stdout_text = None
            if not self.is_noop_job():
                logger.warning(
                    "unable to read output file: %s, continuing..." %
                    (out_file))
        else:
            OUT.close()
Example #7
0
    def read_job_out_file(self, out_file=None, store_monitoring_events=True):
        """
        This function reads both stdout and stderr files and populates
        these fields in the Job class.
        """
        my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000
        if self._output_file is None:
            # This is the case for SUBDAG jobs
            self._stdout_text = None

        if out_file is None:
            # PM-1297 only construct relative path if out_file is not explicitly specified
            run_dir = self._job_submit_dir

            # PM-1157 output file has absolute path from submit file
            # interferes with replay mode on another directory
            # basename = self._output_file

            basename = self._exec_job_id + ".out"
            if self._has_rotated_stdout_err_files:
                basename += ".%03d" % (self._job_output_counter)
            out_file = os.path.join(run_dir, basename)

        try:
            OUT = open(out_file, 'r')
            job_stdout = self.split_task_output(OUT.read())
            buf = job_stdout.user_data
            if len(buf) > my_max_encoded_length:
                buf = buf[:my_max_encoded_length]
            self._stdout_text = utils.quote("#@ 1 stdout\n" + buf)

            if store_monitoring_events:
                self._add_additional_monitoring_events(job_stdout.events)
        except IOError:
            self._stdout_text = None
            if not self.is_noop_job():
                logger.warning("unable to read output file: %s, continuing..." % (out_file))
        else:
            OUT.close()
Example #8
0
    def read_job_error_file(self, store_monitoring_events=True):
        """

        :param store_monitoring_events: whether to store any parsed monitoring events in the job
        :return:
        """
        my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000
        if self._error_file is None:
            # This is the case for SUBDAG jobs
            self._stderr_text = None
            return

        # Finally, read error file only
        run_dir = self._job_submit_dir
        basename = self.get_rotated_err_filename()
        my_err_file = os.path.join(run_dir, basename)

        try:
            ERR = open(my_err_file, 'r')
            # PM-1274 parse any monitoring events such as integrity related
            # from PegasusLite .err file
            job_stderr = self.split_task_output(ERR.read())
            buf = job_stderr.user_data
            if len(buf) > my_max_encoded_length:
                buf = buf[:my_max_encoded_length]
            self._stderr_text = utils.quote(buf)

            if store_monitoring_events:
                self._add_additional_monitoring_events(job_stderr.events)
        except IOError:
            self._stderr_text = None
            if not self.is_noop_job():
                logger.warning("unable to read error file: %s, continuing..." %
                               (my_err_file))
        else:
            ERR.close()
Example #9
0
    def extract_job_info(self, kickstart_output):
        """
        This function reads the output from the kickstart parser and
        extracts the job information for the Stampede schema. It first
        looks for an invocation record, and then for a clustered
        record.

        Returns None if an error occurs, True if an invocation record
        was found, and False if it wasn't.
        """

        # Check if we have anything
        if len(kickstart_output) == 0:
            return None

        # Kickstart was parsed
        self._kickstart_parsed = True

        # PM-1157 we construct run dir from job submit dir
        run_dir = self._job_submit_dir

        # Let's try to find an invocation record...
        my_invocation_found = False
        my_task_number = 0
        self._stdout_text = ""  # Initialize stdout
        stdout_text_list = []
        stdout_size = 0
        for my_record in kickstart_output:
            if not "invocation" in my_record:
                # Not this one... skip to the next
                continue
            # Ok, we have an invocation record, extract the information we
            # need. Note that this may overwrite information obtained from
            # the submit file (e.g. the site_name).

            # Increment task_number
            my_task_number = my_task_number + 1

            if not my_invocation_found:
                # Things we only need to do once
                if "resource" in my_record:
                    self._site_name = my_record["resource"]
                if "user" in my_record:
                    self._remote_user = my_record["user"]
                if "cwd" in my_record:
                    self._remote_working_dir = my_record["cwd"]
                if "hostname" in my_record:
                    self._host_id = my_record["hostname"]

                # We are done with this part
                my_invocation_found = True

            # PM-1109 encode signal information if it exists
            signal_message = " "
            if "signalled" in my_record:
                # construct our own error message
                attrs = my_record["signalled"]
                signal_message = "Job was "
                if "action" in attrs:
                    signal_message += attrs["action"]
                if "signal" in attrs:
                    signal_message += " with signal " + attrs["signal"]

            #PM-641 optimization Modified string concatenation to a list join
            if "stdout" in my_record:
                # PM-1152 we always attempt to store upto MAX_OUTPUT_LENGTH
                stdout = self.get_snippet_to_populate(my_record["stdout"],
                                                      my_task_number,
                                                      stdout_size, "stdout")
                if stdout is not None:
                    try:
                        stdout_text_list.append(
                            utils.quote("#@ %d stdout\n" % (my_task_number)))
                        stdout_text_list.append(utils.quote(stdout))
                        stdout_text_list.append(utils.quote("\n"))
                        stdout_size += len(stdout) + 20
                    except KeyError:
                        logger.exception(
                            "Unable to parse stdout section from kickstart record for task %s from file %s "
                            %
                            (my_task_number, self.get_rotated_out_filename()))

            if "stderr" in my_record:
                # Note: we are populating task stderr from kickstart record to job stdout only
                stderr = self.get_snippet_to_populate(
                    signal_message + my_record["stderr"], my_task_number,
                    stdout_size, "stderr")
                if stderr is not None:
                    try:
                        stdout_text_list.append(
                            utils.quote("#@ %d stderr\n" % (my_task_number)))
                        stdout_text_list.append(utils.quote(stderr))
                        stdout_text_list.append(utils.quote("\n"))
                        stdout_size += len(stderr) + 20
                    except KeyError:
                        logger.exception(
                            "Unable to parse stderr section from kickstart record for task %s from file %s "
                            %
                            (my_task_number, self.get_rotated_out_filename()))

        if len(stdout_text_list) > 0:
            self._stdout_text = "".join(stdout_text_list)

            #PM-641 optimization merged encoding above
        # Now, we encode it!
#        if self._stdout_text != "":
#            self._stdout_text = utils.quote(self._stdout_text)

        if not my_invocation_found:
            logger.debug("cannot find invocation record in output")

        # Look for clustered record...
        my_cluster_found = False
        for my_record in kickstart_output:
            if not "clustered" in my_record:
                # Not this one... skip to the next
                continue
            # Ok found it, fill in cluster parameters
            if "duration" in my_record:
                self._cluster_duration = my_record["duration"]
            if "start" in my_record:
                # Convert timestamp to EPOCH
                my_start = utils.epochdate(my_record["start"])
                if my_start is not None:
                    self._cluster_start_time = my_start
            # No need to look further...
            my_cluster_found = True
            break

        if not my_cluster_found:
            logger.debug("cannot find cluster record in output")

        # Finally, read error file only
        #my_err_file = os.path.join(run_dir, self._error_file)
        basename = self._exec_job_id + ".err"
        my_err_file = os.path.join(run_dir, basename)

        if my_invocation_found:
            # in my job output there were some invocation records
            # assume then that they are rotated also
            my_err_file = my_err_file + ".%03d" % (self._job_output_counter)

        try:
            ERR = open(my_err_file, 'r')
            self._stderr_text = utils.quote(ERR.read())
        except IOError:
            self._stderr_text = None
            if not self.is_noop_job():
                logger.warning("unable to read error file: %s, continuing..." %
                               (my_err_file))
        else:
            ERR.close()

        # Done populating Job class with information from the output file
        return my_invocation_found
Example #10
0
    def read_stdout_stderr_files(self, run_dir=None):
        """
        This function reads both stdout and stderr files and populates
        these fields in the Job class.
        """
        my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000

        if run_dir is None:
            # PM-1157 pick from the job submit directory associated with the job
            run_dir = self._job_submit_dir

        if self._output_file is None:
            # This is the case for SUBDAG jobs
            self._stdout_text = None
        else:
            # PM-1157 output file has absolute path from submit file
            # interferes with replay mode on another directory
            # basename = self._output_file

            basename = self._exec_job_id + ".out"
            if self._has_rotated_stdout_err_files:
                basename += ".%03d" % (self._job_output_counter)

            my_out_file = os.path.join(run_dir, basename)

            try:
                OUT = open(my_out_file, 'r')
                buffer = OUT.read()
                if len(buffer) > my_max_encoded_length:
                    buffer = buffer[:my_max_encoded_length]
                self._stdout_text = utils.quote("#@ 1 stdout\n" + buffer)
            except IOError:
                self._stdout_text = None
                if not self.is_noop_job():
                    logger.warning(
                        "unable to read output file: %s, continuing..." %
                        (my_out_file))
            else:
                OUT.close()

        if self._error_file is None:
            # This is the case for SUBDAG jobs
            self._stderr_text = None
        else:
            #basename = self._error_file
            basename = self._exec_job_id + ".err"
            if self._has_rotated_stdout_err_files:
                basename += ".%03d" % (self._job_output_counter)

            my_err_file = os.path.join(run_dir, basename)

            try:
                ERR = open(my_err_file, 'r')
                buffer = ERR.read()
                if len(buffer) > my_max_encoded_length:
                    buffer = buffer[:my_max_encoded_length]
                self._stderr_text = utils.quote(buffer)
            except IOError:
                self._stderr_text = None
                if not self.is_noop_job():
                    logger.warning(
                        "unable to read error file: %s, continuing..." %
                        (my_err_file))
            else:
                ERR.close()
Example #11
0
    def testQuote(self):
        "Quoting should replace non-printing characters with XML character entity references"
        self.assertEqual(utils.quote("hello\r\n\t"), "hello%0D%0A%09")

        for i in range(0, 0x20):
            self.assertEqual(utils.quote(chr(i)), "%%%02X" % i)

        for i in range(0x20, 0x7F):
            if not chr(i) in "'\"%":
                self.assertEqual(utils.quote(chr(i)), chr(i))

        for i in range(0x7F, 0xFF):
            self.assertEqual(utils.quote(chr(i)), "%%%02X" % i)

        self.assertEqual(utils.quote("%"), "%25")
        self.assertEqual(utils.quote("'"), "%27")
        self.assertEqual(utils.quote('"'), "%22")

        self.assertEqual(utils.quote("Hello\nWorld!\n"), "Hello%0AWorld!%0A")
        self.assertEqual(utils.quote("Zoë"), "Zo%C3%AB")
        self.assertEqual(utils.quote(u"Zoë"), "Zo%C3%AB")
        self.assertEqual(utils.quote(u"Zo\xeb"), "Zo%C3%AB")
        self.assertEqual(utils.quote("warning: unused variable ‘Narr’"), "warning: unused variable %E2%80%98Narr%E2%80%99")
        self.assertEqual(utils.quote(u"warning: unused variable ‘Narr’"), "warning: unused variable %E2%80%98Narr%E2%80%99")
        self.assertEqual(utils.quote(u"warning: unused variable \u2018Narr\u2019"), "warning: unused variable %E2%80%98Narr%E2%80%99")
Example #12
0
 def testQuoteInvalidChars(self):
     "Invalid UTF-8 byte strings should not cause quote to fail"
     self.assertEqual(utils.quote("\x80"), "%80")  # Invalid 1 Octet Sequence
     self.assertEqual(utils.quote("\xc3\x28"), "%C3(")  # Invalid 2 Octet Sequence
     self.assertEqual(utils.quote("\xa0\xa1"), "%A0%A1")  # Invalid Sequence Identifier
     self.assertEqual(utils.quote("\xe2\x82\xa1"), "%E2%82%A1")  # Valid 3 Octet Sequence
     self.assertEqual(utils.quote("\xe2\x28\xa1"), "%E2(%A1")  # Invalid 3 Octet Sequence (in 2nd Octet)
     self.assertEqual(utils.quote("\xe2\x82\x28"), "%E2%82(")  # Invalid 3 Octet Sequence (in 3rd Octet)
     self.assertEqual(utils.quote("\xf0\x90\x8c\xbc"), "%F0%90%8C%BC")  # Valid 4 Octet Sequence
     self.assertEqual(utils.quote("\xf0\x28\x8c\xbc"), "%F0(%8C%BC")  # Invalid 4 Octet Sequence (in 2nd Octet)
     self.assertEqual(utils.quote("\xf0\x90\x28\xbc"), "%F0%90(%BC")  # Invalid 4 Octet Sequence (in 3rd Octet)
     self.assertEqual(utils.quote("\xf0\x28\x8c\x28"), "%F0(%8C(")  # Invalid 4 Octet Sequence (in 4th Octet)
     self.assertEqual(utils.quote("\xf8\xa1\xa1\xa1\xa1"), "%F8%A1%A1%A1%A1")  # Valid 5 Octet Sequence (but not Unicode!)
     self.assertEqual(utils.quote("\xfc\xa1\xa1\xa1\xa1\xa1"), "%FC%A1%A1%A1%A1%A1")  # Valid 6 Octet Sequence (but not Unicode!)
Example #13
0
 def testQuoteUnquoteUnicode(self):
     "Unicode strings should be utf-8 encoded when passed through quote"
     self.assertEqual(utils.quote(u"Zo\xeb"), "Zo%C3%AB")
Example #14
0
    def extract_job_info(self, kickstart_output):
        """
        This function reads the output from the kickstart parser and
        extracts the job information for the Stampede schema. It first
        looks for an invocation record, and then for a clustered
        record.

        Returns None if an error occurs, True if an invocation record
        was found, and False if it wasn't.
        """

        # Check if we have anything
        if len(kickstart_output) == 0:
            return None

        # Kickstart was parsed
        self._kickstart_parsed = True

        # PM-1157 we construct run dir from job submit dir
        run_dir = self._job_submit_dir

        # Let's try to find an invocation record...
        my_invocation_found = False
        my_task_number = 0
        self._stdout_text = "" # Initialize stdout
        stdout_text_list = []
        stdout_size=0
        for my_record in kickstart_output:
            if not "invocation" in my_record:
                # Not this one... skip to the next
                continue
            # Ok, we have an invocation record, extract the information we
            # need. Note that this may overwrite information obtained from
            # the submit file (e.g. the site_name).
            
            # Increment task_number
            my_task_number = my_task_number + 1

            if not my_invocation_found:
                # Things we only need to do once
                if "resource" in my_record:
                    self._site_name = my_record["resource"]
                if "user" in my_record:
                    self._remote_user = my_record["user"]
                if "cwd" in my_record:
                    self._remote_working_dir = my_record["cwd"]
                if "hostname" in my_record:
                    self._host_id = my_record["hostname"]
            
                # We are done with this part
                my_invocation_found = True

            # PM-1109 encode signal information if it exists
            signal_message = " "
            if "signalled" in my_record:
                # construct our own error message
                attrs = my_record["signalled"]
                signal_message = "Job was "
                if "action" in attrs:
                    signal_message += attrs["action"]
                if "signal" in attrs:
                    signal_message += " with signal " + attrs["signal"]

            #PM-641 optimization Modified string concatenation to a list join 
            if "stdout" in my_record:
                task_output = self.split_task_output( my_record["stdout"])
                self._add_additional_monitoring_events(task_output.events)
                # PM-1152 we always attempt to store upto MAX_OUTPUT_LENGTH
                stdout = self.get_snippet_to_populate( task_output.user_data, my_task_number, stdout_size, "stdout")
                if stdout is not None:
                    try:
                        stdout_text_list.append(utils.quote("#@ %d stdout\n" % (my_task_number)))
                        stdout_text_list.append(utils.quote(stdout))
                        stdout_text_list.append(utils.quote("\n"))
                        stdout_size += len(stdout) + 20
                    except KeyError:
                        logger.exception( "Unable to parse stdout section from kickstart record for task %s from file %s " %(my_task_number, self.get_rotated_out_filename() ))

            if "stderr" in my_record:
                task_error = self.split_task_output(my_record["stderr"])
                 # add the events to those retrieved from the application stderr
                self._add_additional_monitoring_events(task_error.events)
                 # Note: we are populating task stderr from kickstart record to job stdout only
                stderr = self.get_snippet_to_populate( signal_message + task_error.user_data, my_task_number, stdout_size, "stderr")
                if stderr is not None:
                    try:
                        stdout_text_list.append(utils.quote("#@ %d stderr\n" % (my_task_number)))
                        stdout_text_list.append(utils.quote(stderr))
                        stdout_text_list.append(utils.quote("\n"))
                        stdout_size += len( stderr ) + 20
                    except KeyError:
                        logger.exception( "Unable to parse stderr section from kickstart record for task %s from file %s " %(my_task_number, self.get_rotated_out_filename() ))

        if len(stdout_text_list) > 0 :
            self._stdout_text = "".join(stdout_text_list)


            #PM-641 optimization merged encoding above
        # Now, we encode it!
#        if self._stdout_text != "":
#            self._stdout_text = utils.quote(self._stdout_text)


        if not my_invocation_found:
            logger.debug("cannot find invocation record in output")

        # Look for clustered record...
        my_cluster_found = False
        for my_record in kickstart_output:
            if not "clustered" in my_record:
                # Not this one... skip to the next
                continue
            # Ok found it, fill in cluster parameters
            if "duration" in my_record:
                self._cluster_duration = my_record["duration"]
            if "start" in my_record:
                # Convert timestamp to EPOCH
                my_start = utils.epochdate(my_record["start"])
                if my_start is not None:
                    self._cluster_start_time = my_start
            # No need to look further...
            my_cluster_found = True
            break

        if not my_cluster_found:
            logger.debug("cannot find cluster record in output")

        # Done populating Job class with information from the output file
        return my_invocation_found
Example #15
0
    def testQuote(self):
        "Quoting should replace non-printing characters with XML character entity references"
        self.assertEqual(utils.quote("hello\r\n\t"), "hello%0D%0A%09")

        for i in range(0, 0x20):
            self.assertEqual(utils.quote(chr(i)), "%%%02X" % i)

        for i in range(0x20, 0x7F):
            if not chr(i) in "'\"%":
                self.assertEqual(utils.quote(chr(i)), chr(i))

        for i in range(0x7F, 0xFF):
            self.assertEqual(
                utils.quote(i.to_bytes(length=1, byteorder="big")), "%%%02X" % i
            )

        self.assertEqual(utils.quote("%"), "%25")
        self.assertEqual(utils.quote("'"), "%27")
        self.assertEqual(utils.quote('"'), "%22")

        self.assertEqual(utils.quote("Hello\nWorld!\n"), "Hello%0AWorld!%0A")
        self.assertEqual(utils.quote("Zoë"), "Zo%C3%AB")
        self.assertEqual(utils.quote("Zoë"), "Zo%C3%AB")
        self.assertEqual(utils.quote("Zo\xeb"), "Zo%C3%AB")
        self.assertEqual(
            utils.quote("warning: unused variable ‘Narr’"),
            "warning: unused variable %E2%80%98Narr%E2%80%99",
        )
        self.assertEqual(
            utils.quote("warning: unused variable ‘Narr’"),
            "warning: unused variable %E2%80%98Narr%E2%80%99",
        )
        self.assertEqual(
            utils.quote("warning: unused variable \u2018Narr\u2019"),
            "warning: unused variable %E2%80%98Narr%E2%80%99",
        )
Example #16
0
 def testQuoteInvalidChars(self):
     "Invalid UTF-8 byte strings should not cause quote to fail"
     self.assertEqual(utils.quote(b"\x80"), "%80")  # Invalid 1 Octet Sequence
     self.assertEqual(utils.quote(b"\xc3\x28"), "%C3(")  # Invalid 2 Octet Sequence
     self.assertEqual(
         utils.quote(b"\xa0\xa1"), "%A0%A1"
     )  # Invalid Sequence Identifier
     self.assertEqual(
         utils.quote(b"\xe2\x82\xa1"), "%E2%82%A1"
     )  # Valid 3 Octet Sequence
     self.assertEqual(
         utils.quote(b"\xe2\x28\xa1"), "%E2(%A1"
     )  # Invalid 3 Octet Sequence (in 2nd Octet)
     self.assertEqual(
         utils.quote(b"\xe2\x82\x28"), "%E2%82("
     )  # Invalid 3 Octet Sequence (in 3rd Octet)
     self.assertEqual(
         utils.quote(b"\xf0\x90\x8c\xbc"), "%F0%90%8C%BC"
     )  # Valid 4 Octet Sequence
     self.assertEqual(
         utils.quote(b"\xf0\x28\x8c\xbc"), "%F0(%8C%BC"
     )  # Invalid 4 Octet Sequence (in 2nd Octet)
     self.assertEqual(
         utils.quote(b"\xf0\x90\x28\xbc"), "%F0%90(%BC"
     )  # Invalid 4 Octet Sequence (in 3rd Octet)
     self.assertEqual(
         utils.quote(b"\xf0\x28\x8c\x28"), "%F0(%8C("
     )  # Invalid 4 Octet Sequence (in 4th Octet)
     self.assertEqual(
         utils.quote(b"\xf8\xa1\xa1\xa1\xa1"), "%F8%A1%A1%A1%A1"
     )  # Valid 5 Octet Sequence (but not Unicode!)
     self.assertEqual(
         utils.quote(b"\xfc\xa1\xa1\xa1\xa1\xa1"), "%FC%A1%A1%A1%A1%A1"
     )  # Valid 6 Octet Sequence (but not Unicode!)
Example #17
0
 def testQuoteUnquoteUnicode(self):
     "Unicode strings should be utf-8 encoded when passed through quote"
     self.assertEqual(utils.quote("Zo\xeb"), "Zo%C3%AB")
Example #18
0
 def testQuoteUnquoteLatin1(self):
     "A latin-1 encoded string should be unmodified through quote and unquote"
     self.assertEqual("R\xe9sum\xe9",utils.unquote(utils.quote("R\xe9sum\xe9")))
     self.assertEqual(utils.quote("R\xe9sum\xe9"), "R%E9sum%E9")
     self.assertEqual("R\xe9sum\xe9",utils.unquote("R%E9sum%E9"))
     self.assertEqual("R\xe9sum\xe9",utils.unquote(u"R%E9sum%E9"))
Example #19
0
File: job.py Project: kyo19/pegasus
    def extract_job_info(self, run_dir, kickstart_output):
        """
        This function reads the output from the kickstart parser and
        extracts the job information for the Stampede schema. It first
        looks for an invocation record, and then for a clustered
        record.

        Returns None if an error occurs, True if an invocation record
        was found, and False if it wasn't.
        """

        # Check if we have anything
        if len(kickstart_output) == 0:
            return None

        # Kickstart was parsed
        self._kickstart_parsed = True

        # Let's try to find an invocation record...
        my_invocation_found = False
        my_task_number = 0
        self._stdout_text = "" # Initialize stdout
        stdout_text_list = []
        stdout_size=0
        for my_record in kickstart_output:
            if not "invocation" in my_record:
                # Not this one... skip to the next
                continue
            # Ok, we have an invocation record, extract the information we
            # need. Note that this may overwrite information obtained from
            # the submit file (e.g. the site_name).
            
            # Increment task_number
            my_task_number = my_task_number + 1

            if not my_invocation_found:
                # Things we only need to do once
                if "resource" in my_record:
                    self._site_name = my_record["resource"]
                if "user" in my_record:
                    self._remote_user = my_record["user"]
                if "cwd" in my_record:
                    self._remote_working_dir = my_record["cwd"]
                if "hostname" in my_record:
                    self._host_id = my_record["hostname"]
            
                # We are done with this part
                my_invocation_found = True

            #PM-641 optimization Modified string concatenation to a list join 
            if "stdout" in my_record:
                if len(my_record["stdout"])<= MAX_OUTPUT_LENGTH - stdout_size:
                    stdout_text_list.append(utils.quote("#@ %d stdout\n" % (my_task_number)))
                    stdout_text_list.append(utils.quote(my_record["stdout"]))
                    stdout_text_list.append(utils.quote("\n"))
                    stdout_size+=len(my_record["stdout"])+20

            if "stderr" in my_record:
                if len(my_record["stderr"]) <= MAX_OUTPUT_LENGTH - stdout_size :
                    stdout_text_list.append(utils.quote("#@ %d stderr\n" % (my_task_number)))
                    stdout_text_list.append(utils.quote(my_record["stderr"]))
                    stdout_text_list.append(utils.quote("\n"))
                    stdout_size+=len(my_record["stderr"])+20

        if len(stdout_text_list) > 0 :
            self._stdout_text = "".join(stdout_text_list)


            #PM-641 optimization merged encoding above
        # Now, we encode it!
#        if self._stdout_text != "":
#            self._stdout_text = utils.quote(self._stdout_text)


        if not my_invocation_found:
            logger.debug("cannot find invocation record in output")

        # Look for clustered record...
        my_cluster_found = False
        for my_record in kickstart_output:
            if not "clustered" in my_record:
                # Not this one... skip to the next
                continue
            # Ok found it, fill in cluster parameters
            if "duration" in my_record:
                self._cluster_duration = my_record["duration"]
            if "start" in my_record:
                # Convert timestamp to EPOCH
                my_start = utils.epochdate(my_record["start"])
                if my_start is not None:
                    self._cluster_start_time = my_start
            # No need to look further...
            my_cluster_found = True
            break

        if not my_cluster_found:
            logger.debug("cannot find cluster record in output")

        # Finally, read error file only 
        my_err_file = os.path.join(run_dir, self._error_file)

        if my_invocation_found:
            # in my job output there were some invocation records
            # assume then that they are rotated also
            my_err_file = my_err_file + ".%03d" % (self._job_output_counter)

        try:
            ERR = open(my_err_file, 'r')
            self._stderr_text = utils.quote(ERR.read())
        except IOError:
            self._stderr_text = None
            logger.warning("unable to read error file: %s, continuing..." % (my_err_file))
        else:
            ERR.close()

        # Done populating Job class with information from the output file
        return my_invocation_found
Example #20
0
    def extract_job_info(self, kickstart_output):
        """
        This function reads the output from the kickstart parser and
        extracts the job information for the Stampede schema. It first
        looks for an invocation record, and then for a clustered
        record.

        Returns None if an error occurs, True if an invocation record
        was found, and False if it wasn't.
        """

        # Check if we have anything
        if len(kickstart_output) == 0:
            return None

        # Kickstart was parsed
        self._kickstart_parsed = True

        # PM-1157 we construct run dir from job submit dir
        self._job_submit_dir

        # Let's try to find an invocation record...
        my_invocation_found = False
        my_task_number = 0
        self._stdout_text = ""  # Initialize stdout
        stdout_text_list = []
        stdout_size = 0
        for my_record in kickstart_output:
            if "multipart" in my_record:
                # PM-1390 convert to integrity metrics
                logger.debug("Multipart record %s", my_record)
                self._add_multipart_events([my_record])
            elif not "invocation" in my_record:
                # Not this one... skip to the next
                logger.trace("Skipping %s", my_record)
                continue
            # Ok, we have an invocation record, extract the information we
            # need. Note that this may overwrite information obtained from
            # the submit file (e.g. the site_name).

            # Increment task_number
            my_task_number = my_task_number + 1

            if not my_invocation_found:
                # Things we only need to do once
                if "resource" in my_record:
                    self._site_name = my_record["resource"]
                if "user" in my_record:
                    self._remote_user = my_record["user"]
                if "cwd" in my_record:
                    self._remote_working_dir = my_record["cwd"]
                if "hostname" in my_record:
                    self._host_id = my_record["hostname"]

                # We are done with this part
                my_invocation_found = True

            # PM-1109 encode signal information if it exists
            signal_message = " "
            if "signalled" in my_record:
                # construct our own error message
                attrs = my_record["signalled"]
                signal_message = "Job was "
                if "action" in attrs:
                    signal_message += attrs["action"]
                if "signal" in attrs:
                    signal_message += " with signal " + attrs["signal"]

            # PM-641 optimization Modified string concatenation to a list join
            if "stdout" in my_record:
                task_output = self.split_task_output(my_record["stdout"])
                self._add_additional_monitoring_events(task_output.events)
                # PM-1152 we always attempt to store upto MAX_OUTPUT_LENGTH
                stdout = self.get_snippet_to_populate(task_output.user_data,
                                                      my_task_number,
                                                      stdout_size, "stdout")
                if stdout is not None:
                    try:
                        stdout_text_list.append(
                            utils.quote("#@ %d stdout\n" % (my_task_number)))
                        stdout_text_list.append(utils.quote(stdout))
                        stdout_text_list.append(utils.quote("\n"))
                        stdout_size += len(stdout) + 20
                    except KeyError:
                        logger.exception(
                            "Unable to parse stdout section from kickstart record for task %s from file %s "
                            %
                            (my_task_number, self.get_rotated_out_filename()))

            if "stderr" in my_record:
                task_error = self.split_task_output(my_record["stderr"])
                # add the events to those retrieved from the application stderr
                self._add_additional_monitoring_events(task_error.events)
                # Note: we are populating task stderr from kickstart record to job stdout only
                stderr = self.get_snippet_to_populate(
                    signal_message + task_error.user_data,
                    my_task_number,
                    stdout_size,
                    "stderr",
                )
                if stderr is not None:
                    try:
                        stdout_text_list.append(
                            utils.quote("#@ %d stderr\n" % (my_task_number)))
                        stdout_text_list.append(utils.quote(stderr))
                        stdout_text_list.append(utils.quote("\n"))
                        stdout_size += len(stderr) + 20
                    except KeyError:
                        logger.exception(
                            "Unable to parse stderr section from kickstart record for task %s from file %s "
                            %
                            (my_task_number, self.get_rotated_out_filename()))

            # PM-1398 pass cpu info
            if "cpu" in my_record:
                self._cpu_attribs = my_record["cpu"]

        if len(stdout_text_list) > 0:
            self._stdout_text = "".join(stdout_text_list)

            # PM-641 optimization merged encoding above
        # Now, we encode it!
        #        if self._stdout_text != "":
        #            self._stdout_text = utils.quote(self._stdout_text)

        if not my_invocation_found:
            logger.debug("cannot find invocation record in output")

        # Look for clustered record...
        my_cluster_found = False
        for my_record in kickstart_output:
            if not "clustered" in my_record:
                # Not this one... skip to the next
                continue
            # Ok found it, fill in cluster parameters
            if "duration" in my_record:
                self._cluster_duration = my_record["duration"]
            if "start" in my_record:
                # Convert timestamp to EPOCH
                my_start = utils.epochdate(my_record["start"])
                if my_start is not None:
                    self._cluster_start_time = my_start
            # No need to look further...
            my_cluster_found = True
            break

        if not my_cluster_found:
            logger.debug("cannot find cluster record in output")

        # Done populating Job class with information from the output file
        return my_invocation_found
Example #21
0
 def testQuote(self):
     "Quoting should escape non-printing characters"
     self.assertEquals("hello%0D%0A%09", utils.quote("hello\r\n\t"))
Example #22
0
    def extract_job_info(self, run_dir, kickstart_output):
        """
        This function reads the output from the kickstart parser and
        extracts the job information for the Stampede schema. It first
        looks for an invocation record, and then for a clustered
        record.

        Returns None if an error occurs, True if an invocation record
        was found, and False if it wasn't.
        """

        # Check if we have anything
        if len(kickstart_output) == 0:
            return None

        # Kickstart was parsed
        self._kickstart_parsed = True

        # Let's try to find an invocation record...
        my_invocation_found = False
        my_task_number = 0
        self._stdout_text = "" # Initialize stdout
        stdout_text_list = []
        stdout_size=0
        for my_record in kickstart_output:
            if not "invocation" in my_record:
                # Not this one... skip to the next
                continue
            # Ok, we have an invocation record, extract the information we
            # need. Note that this may overwrite information obtained from
            # the submit file (e.g. the site_name).
            
            # Increment task_number
            my_task_number = my_task_number + 1

            if not my_invocation_found:
                # Things we only need to do once
                if "resource" in my_record:
                    self._site_name = my_record["resource"]
                if "user" in my_record:
                    self._remote_user = my_record["user"]
                if "cwd" in my_record:
                    self._remote_working_dir = my_record["cwd"]
                if "hostname" in my_record:
                    self._host_id = my_record["hostname"]
            
                # We are done with this part
                my_invocation_found = True

            #PM-641 optimization Modified string concatenation to a list join 
            if "stdout" in my_record:
                if len(my_record["stdout"])<= MAX_OUTPUT_LENGTH - stdout_size:
                    try:
                        stdout_text_list.append(utils.quote("#@ %d stdout\n" % (my_task_number)))
                        stdout_text_list.append(utils.quote(my_record["stdout"]))
                        stdout_text_list.append(utils.quote("\n"))
                        stdout_size+=len(my_record["stdout"])+20
                    except KeyError:
                        logger.exception( "Unable to parse stdout section from kickstart record for task %s from file %s " %(my_task_number, self.get_rotated_out_filename() ))

            if "stderr" in my_record:
                if len(my_record["stderr"]) <= MAX_OUTPUT_LENGTH - stdout_size :
                    try:
                        stdout_text_list.append(utils.quote("#@ %d stderr\n" % (my_task_number)))
                        stdout_text_list.append(utils.quote(my_record["stderr"]))
                        stdout_text_list.append(utils.quote("\n"))
                        stdout_size+=len(my_record["stderr"])+20
                    except KeyError:
                        logger.exception( "Unable to parse stderr section from kickstart record for task %s from file %s " %(my_task_number, self.get_rotated_out_filename() ))

        if len(stdout_text_list) > 0 :
            self._stdout_text = "".join(stdout_text_list)


            #PM-641 optimization merged encoding above
        # Now, we encode it!
#        if self._stdout_text != "":
#            self._stdout_text = utils.quote(self._stdout_text)


        if not my_invocation_found:
            logger.debug("cannot find invocation record in output")

        # Look for clustered record...
        my_cluster_found = False
        for my_record in kickstart_output:
            if not "clustered" in my_record:
                # Not this one... skip to the next
                continue
            # Ok found it, fill in cluster parameters
            if "duration" in my_record:
                self._cluster_duration = my_record["duration"]
            if "start" in my_record:
                # Convert timestamp to EPOCH
                my_start = utils.epochdate(my_record["start"])
                if my_start is not None:
                    self._cluster_start_time = my_start
            # No need to look further...
            my_cluster_found = True
            break

        if not my_cluster_found:
            logger.debug("cannot find cluster record in output")

        # Finally, read error file only 
        my_err_file = os.path.join(run_dir, self._error_file)

        if my_invocation_found:
            # in my job output there were some invocation records
            # assume then that they are rotated also
            my_err_file = my_err_file + ".%03d" % (self._job_output_counter)

        try:
            ERR = open(my_err_file, 'r')
            self._stderr_text = utils.quote(ERR.read())
        except IOError:
            self._stderr_text = None
            logger.warning("unable to read error file: %s, continuing..." % (my_err_file))
        else:
            ERR.close()

        # Done populating Job class with information from the output file
        return my_invocation_found
Example #23
0
 def testQuote(self):
     "Quoting should escape non-printing characters"
     self.assertEquals("hello%0D%0A%09", utils.quote("hello\r\n\t"))