def testQuoteUnquoteLatin1(self): "A latin-1 encoded string should be unmodified through quote and unquote" self.assertEqual("R\xe9sum\xe9", utils.unquote(utils.quote("R\xe9sum\xe9"))) self.assertEqual(utils.quote("R\xe9sum\xe9"), "R%E9sum%E9") self.assertEqual("R\xe9sum\xe9", utils.unquote("R%E9sum%E9")) self.assertEqual("R\xe9sum\xe9", utils.unquote(u"R%E9sum%E9"))
def read_stdout_stderr_files(self, run_dir): """ This function reads both stdout and stderr files and populates these fields in the Job class. """ my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000 if self._output_file is None: # This is the case for SUBDAG jobs self._stdout_text = None else: basename = self._output_file if self._has_rotated_stdout_err_files: basename += ".%03d" % (self._job_output_counter) my_out_file = os.path.join(run_dir, basename) try: OUT = open(my_out_file, 'r') buffer = OUT.read() if len(buffer) > my_max_encoded_length: buffer = buffer[:my_max_encoded_length] self._stdout_text = utils.quote("#@ 1 stdout\n" + buffer) except IOError: self._stdout_text = None if not self.is_noop_job(): logger.warning( "unable to read output file: %s, continuing..." % (my_out_file)) else: OUT.close() if self._error_file is None: # This is the case for SUBDAG jobs self._stderr_text = None else: basename = self._error_file if self._has_rotated_stdout_err_files: basename += ".%03d" % (self._job_output_counter) my_err_file = os.path.join(run_dir, basename) try: ERR = open(my_err_file, 'r') buffer = ERR.read() if len(buffer) > my_max_encoded_length: buffer = buffer[:my_max_encoded_length] self._stderr_text = utils.quote(buffer) except IOError: self._stderr_text = None if not self.is_noop_job(): logger.warning( "unable to read error file: %s, continuing..." % (my_err_file)) else: ERR.close()
def read_stdout_stderr_files(self, run_dir): """ This function reads both stdout and stderr files and populates these fields in the Job class. """ my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000 if self._output_file is None: # This is the case for SUBDAG jobs self._stdout_text = None else: basename = self._output_file if self._has_rotated_stdout_err_files: basename += ".%03d" % ( self._job_output_counter) my_out_file = os.path.join(run_dir, basename) try: OUT = open(my_out_file, 'r') buffer = OUT.read() if len( buffer ) > my_max_encoded_length : buffer = buffer[:my_max_encoded_length] self._stdout_text = utils.quote("#@ 1 stdout\n" + buffer) except IOError: self._stdout_text = None if not self.is_noop_job(): logger.warning("unable to read output file: %s, continuing..." % (my_out_file)) else: OUT.close() if self._error_file is None: # This is the case for SUBDAG jobs self._stderr_text = None else: basename = self._error_file if self._has_rotated_stdout_err_files: basename += ".%03d" % ( self._job_output_counter) my_err_file = os.path.join(run_dir, basename) try: ERR = open(my_err_file, 'r') buffer = ERR.read() if len( buffer ) > my_max_encoded_length : buffer = buffer[:my_max_encoded_length] self._stderr_text = utils.quote(buffer) except IOError: self._stderr_text = None if not self.is_noop_job(): logger.warning("unable to read error file: %s, continuing..." % (my_err_file)) else: ERR.close()
def read_job_error_file(self, store_monitoring_events=True): """ Reads the job error file and updates job structures to store the the stderr of the condor job and also attempts to parse the hostname from the stderr of the job :param store_monitoring_events: whether to store any parsed monitoring events in the job :return: """ my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000 if self._error_file is None: # This is the case for SUBDAG jobs self._stderr_text = None return # Finally, read error file only run_dir = self._job_submit_dir basename = self.get_rotated_err_filename() my_err_file = os.path.join(run_dir, basename) try: ERR = open(my_err_file) # PM-1274 parse any monitoring events such as integrity related # from PegasusLite .err file job_stderr = self.split_task_output(ERR.read()) buf = job_stderr.user_data if len(buf) > my_max_encoded_length: buf = buf[:my_max_encoded_length] self._stderr_text = utils.quote(buf) if store_monitoring_events: self._add_additional_monitoring_events(job_stderr.events) # PM-1355 attempt to determine the hostname from the pegasus lite job hostname_match = re_parse_pegasuslite_hostname.search(job_stderr.user_data) if hostname_match: # a match yes it is a PegasusLite job . gleam the hostname self._host_id = hostname_match.group(1) self._host_ip = hostname_match.group(2) except OSError: self._stderr_text = None if not self.is_noop_job(): logger.warning( "unable to read error file: %s, continuing..." % (my_err_file) ) else: ERR.close()
def read_job_error_file(self, store_monitoring_events=True): """ Reads the job error file and updates job structures to store the the stderr of the condor job and also attempts to parse the hostname from the stderr of the job :param store_monitoring_events: whether to store any parsed monitoring events in the job :return: """ my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000 if self._error_file is None: # This is the case for SUBDAG jobs self._stderr_text = None return # Finally, read error file only run_dir = self._job_submit_dir basename = self.get_rotated_err_filename() my_err_file = os.path.join(run_dir, basename) try: ERR = open(my_err_file, 'r') # PM-1274 parse any monitoring events such as integrity related # from PegasusLite .err file job_stderr = self.split_task_output(ERR.read()) buf = job_stderr.user_data if len(buf) > my_max_encoded_length: buf = buf[:my_max_encoded_length] self._stderr_text = utils.quote(buf) if store_monitoring_events: self._add_additional_monitoring_events(job_stderr.events) # PM-1355 attempt to determine the hostname from the pegasus lite job hostname_match = re_parse_pegasuslite_hostname.search(job_stderr.user_data) if hostname_match: # a match yes it is a PegasusLite job . gleam the hostname self._host_id = hostname_match.group(1) except IOError: self._stderr_text = None if not self.is_noop_job(): logger.warning("unable to read error file: %s, continuing..." % (my_err_file)) else: ERR.close()
def read_job_out_file(self, out_file=None, store_monitoring_events=True): """ This function reads both stdout and stderr files and populates these fields in the Job class. """ my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000 if self._output_file is None: # This is the case for SUBDAG jobs self._stdout_text = None if out_file is None: # PM-1297 only construct relative path if out_file is not explicitly specified run_dir = self._job_submit_dir # PM-1157 output file has absolute path from submit file # interferes with replay mode on another directory # basename = self._output_file basename = self._exec_job_id + ".out" if self._has_rotated_stdout_err_files: basename += ".%03d" % (self._job_output_counter) out_file = os.path.join(run_dir, basename) try: OUT = open(out_file, 'r') job_stdout = self.split_task_output(OUT.read()) buf = job_stdout.user_data if len(buf) > my_max_encoded_length: buf = buf[:my_max_encoded_length] self._stdout_text = utils.quote("#@ 1 stdout\n" + buf) if store_monitoring_events: self._add_additional_monitoring_events(job_stdout.events) except IOError: self._stdout_text = None if not self.is_noop_job(): logger.warning( "unable to read output file: %s, continuing..." % (out_file)) else: OUT.close()
def read_job_out_file(self, out_file=None, store_monitoring_events=True): """ This function reads both stdout and stderr files and populates these fields in the Job class. """ my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000 if self._output_file is None: # This is the case for SUBDAG jobs self._stdout_text = None if out_file is None: # PM-1297 only construct relative path if out_file is not explicitly specified run_dir = self._job_submit_dir # PM-1157 output file has absolute path from submit file # interferes with replay mode on another directory # basename = self._output_file basename = self._exec_job_id + ".out" if self._has_rotated_stdout_err_files: basename += ".%03d" % (self._job_output_counter) out_file = os.path.join(run_dir, basename) try: OUT = open(out_file, 'r') job_stdout = self.split_task_output(OUT.read()) buf = job_stdout.user_data if len(buf) > my_max_encoded_length: buf = buf[:my_max_encoded_length] self._stdout_text = utils.quote("#@ 1 stdout\n" + buf) if store_monitoring_events: self._add_additional_monitoring_events(job_stdout.events) except IOError: self._stdout_text = None if not self.is_noop_job(): logger.warning("unable to read output file: %s, continuing..." % (out_file)) else: OUT.close()
def read_job_error_file(self, store_monitoring_events=True): """ :param store_monitoring_events: whether to store any parsed monitoring events in the job :return: """ my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000 if self._error_file is None: # This is the case for SUBDAG jobs self._stderr_text = None return # Finally, read error file only run_dir = self._job_submit_dir basename = self.get_rotated_err_filename() my_err_file = os.path.join(run_dir, basename) try: ERR = open(my_err_file, 'r') # PM-1274 parse any monitoring events such as integrity related # from PegasusLite .err file job_stderr = self.split_task_output(ERR.read()) buf = job_stderr.user_data if len(buf) > my_max_encoded_length: buf = buf[:my_max_encoded_length] self._stderr_text = utils.quote(buf) if store_monitoring_events: self._add_additional_monitoring_events(job_stderr.events) except IOError: self._stderr_text = None if not self.is_noop_job(): logger.warning("unable to read error file: %s, continuing..." % (my_err_file)) else: ERR.close()
def extract_job_info(self, kickstart_output): """ This function reads the output from the kickstart parser and extracts the job information for the Stampede schema. It first looks for an invocation record, and then for a clustered record. Returns None if an error occurs, True if an invocation record was found, and False if it wasn't. """ # Check if we have anything if len(kickstart_output) == 0: return None # Kickstart was parsed self._kickstart_parsed = True # PM-1157 we construct run dir from job submit dir run_dir = self._job_submit_dir # Let's try to find an invocation record... my_invocation_found = False my_task_number = 0 self._stdout_text = "" # Initialize stdout stdout_text_list = [] stdout_size = 0 for my_record in kickstart_output: if not "invocation" in my_record: # Not this one... skip to the next continue # Ok, we have an invocation record, extract the information we # need. Note that this may overwrite information obtained from # the submit file (e.g. the site_name). # Increment task_number my_task_number = my_task_number + 1 if not my_invocation_found: # Things we only need to do once if "resource" in my_record: self._site_name = my_record["resource"] if "user" in my_record: self._remote_user = my_record["user"] if "cwd" in my_record: self._remote_working_dir = my_record["cwd"] if "hostname" in my_record: self._host_id = my_record["hostname"] # We are done with this part my_invocation_found = True # PM-1109 encode signal information if it exists signal_message = " " if "signalled" in my_record: # construct our own error message attrs = my_record["signalled"] signal_message = "Job was " if "action" in attrs: signal_message += attrs["action"] if "signal" in attrs: signal_message += " with signal " + attrs["signal"] #PM-641 optimization Modified string concatenation to a list join if "stdout" in my_record: # PM-1152 we always attempt to store upto MAX_OUTPUT_LENGTH stdout = self.get_snippet_to_populate(my_record["stdout"], my_task_number, stdout_size, "stdout") if stdout is not None: try: stdout_text_list.append( utils.quote("#@ %d stdout\n" % (my_task_number))) stdout_text_list.append(utils.quote(stdout)) stdout_text_list.append(utils.quote("\n")) stdout_size += len(stdout) + 20 except KeyError: logger.exception( "Unable to parse stdout section from kickstart record for task %s from file %s " % (my_task_number, self.get_rotated_out_filename())) if "stderr" in my_record: # Note: we are populating task stderr from kickstart record to job stdout only stderr = self.get_snippet_to_populate( signal_message + my_record["stderr"], my_task_number, stdout_size, "stderr") if stderr is not None: try: stdout_text_list.append( utils.quote("#@ %d stderr\n" % (my_task_number))) stdout_text_list.append(utils.quote(stderr)) stdout_text_list.append(utils.quote("\n")) stdout_size += len(stderr) + 20 except KeyError: logger.exception( "Unable to parse stderr section from kickstart record for task %s from file %s " % (my_task_number, self.get_rotated_out_filename())) if len(stdout_text_list) > 0: self._stdout_text = "".join(stdout_text_list) #PM-641 optimization merged encoding above # Now, we encode it! # if self._stdout_text != "": # self._stdout_text = utils.quote(self._stdout_text) if not my_invocation_found: logger.debug("cannot find invocation record in output") # Look for clustered record... my_cluster_found = False for my_record in kickstart_output: if not "clustered" in my_record: # Not this one... skip to the next continue # Ok found it, fill in cluster parameters if "duration" in my_record: self._cluster_duration = my_record["duration"] if "start" in my_record: # Convert timestamp to EPOCH my_start = utils.epochdate(my_record["start"]) if my_start is not None: self._cluster_start_time = my_start # No need to look further... my_cluster_found = True break if not my_cluster_found: logger.debug("cannot find cluster record in output") # Finally, read error file only #my_err_file = os.path.join(run_dir, self._error_file) basename = self._exec_job_id + ".err" my_err_file = os.path.join(run_dir, basename) if my_invocation_found: # in my job output there were some invocation records # assume then that they are rotated also my_err_file = my_err_file + ".%03d" % (self._job_output_counter) try: ERR = open(my_err_file, 'r') self._stderr_text = utils.quote(ERR.read()) except IOError: self._stderr_text = None if not self.is_noop_job(): logger.warning("unable to read error file: %s, continuing..." % (my_err_file)) else: ERR.close() # Done populating Job class with information from the output file return my_invocation_found
def read_stdout_stderr_files(self, run_dir=None): """ This function reads both stdout and stderr files and populates these fields in the Job class. """ my_max_encoded_length = MAX_OUTPUT_LENGTH - 2000 if run_dir is None: # PM-1157 pick from the job submit directory associated with the job run_dir = self._job_submit_dir if self._output_file is None: # This is the case for SUBDAG jobs self._stdout_text = None else: # PM-1157 output file has absolute path from submit file # interferes with replay mode on another directory # basename = self._output_file basename = self._exec_job_id + ".out" if self._has_rotated_stdout_err_files: basename += ".%03d" % (self._job_output_counter) my_out_file = os.path.join(run_dir, basename) try: OUT = open(my_out_file, 'r') buffer = OUT.read() if len(buffer) > my_max_encoded_length: buffer = buffer[:my_max_encoded_length] self._stdout_text = utils.quote("#@ 1 stdout\n" + buffer) except IOError: self._stdout_text = None if not self.is_noop_job(): logger.warning( "unable to read output file: %s, continuing..." % (my_out_file)) else: OUT.close() if self._error_file is None: # This is the case for SUBDAG jobs self._stderr_text = None else: #basename = self._error_file basename = self._exec_job_id + ".err" if self._has_rotated_stdout_err_files: basename += ".%03d" % (self._job_output_counter) my_err_file = os.path.join(run_dir, basename) try: ERR = open(my_err_file, 'r') buffer = ERR.read() if len(buffer) > my_max_encoded_length: buffer = buffer[:my_max_encoded_length] self._stderr_text = utils.quote(buffer) except IOError: self._stderr_text = None if not self.is_noop_job(): logger.warning( "unable to read error file: %s, continuing..." % (my_err_file)) else: ERR.close()
def testQuote(self): "Quoting should replace non-printing characters with XML character entity references" self.assertEqual(utils.quote("hello\r\n\t"), "hello%0D%0A%09") for i in range(0, 0x20): self.assertEqual(utils.quote(chr(i)), "%%%02X" % i) for i in range(0x20, 0x7F): if not chr(i) in "'\"%": self.assertEqual(utils.quote(chr(i)), chr(i)) for i in range(0x7F, 0xFF): self.assertEqual(utils.quote(chr(i)), "%%%02X" % i) self.assertEqual(utils.quote("%"), "%25") self.assertEqual(utils.quote("'"), "%27") self.assertEqual(utils.quote('"'), "%22") self.assertEqual(utils.quote("Hello\nWorld!\n"), "Hello%0AWorld!%0A") self.assertEqual(utils.quote("Zoë"), "Zo%C3%AB") self.assertEqual(utils.quote(u"Zoë"), "Zo%C3%AB") self.assertEqual(utils.quote(u"Zo\xeb"), "Zo%C3%AB") self.assertEqual(utils.quote("warning: unused variable ‘Narr’"), "warning: unused variable %E2%80%98Narr%E2%80%99") self.assertEqual(utils.quote(u"warning: unused variable ‘Narr’"), "warning: unused variable %E2%80%98Narr%E2%80%99") self.assertEqual(utils.quote(u"warning: unused variable \u2018Narr\u2019"), "warning: unused variable %E2%80%98Narr%E2%80%99")
def testQuoteInvalidChars(self): "Invalid UTF-8 byte strings should not cause quote to fail" self.assertEqual(utils.quote("\x80"), "%80") # Invalid 1 Octet Sequence self.assertEqual(utils.quote("\xc3\x28"), "%C3(") # Invalid 2 Octet Sequence self.assertEqual(utils.quote("\xa0\xa1"), "%A0%A1") # Invalid Sequence Identifier self.assertEqual(utils.quote("\xe2\x82\xa1"), "%E2%82%A1") # Valid 3 Octet Sequence self.assertEqual(utils.quote("\xe2\x28\xa1"), "%E2(%A1") # Invalid 3 Octet Sequence (in 2nd Octet) self.assertEqual(utils.quote("\xe2\x82\x28"), "%E2%82(") # Invalid 3 Octet Sequence (in 3rd Octet) self.assertEqual(utils.quote("\xf0\x90\x8c\xbc"), "%F0%90%8C%BC") # Valid 4 Octet Sequence self.assertEqual(utils.quote("\xf0\x28\x8c\xbc"), "%F0(%8C%BC") # Invalid 4 Octet Sequence (in 2nd Octet) self.assertEqual(utils.quote("\xf0\x90\x28\xbc"), "%F0%90(%BC") # Invalid 4 Octet Sequence (in 3rd Octet) self.assertEqual(utils.quote("\xf0\x28\x8c\x28"), "%F0(%8C(") # Invalid 4 Octet Sequence (in 4th Octet) self.assertEqual(utils.quote("\xf8\xa1\xa1\xa1\xa1"), "%F8%A1%A1%A1%A1") # Valid 5 Octet Sequence (but not Unicode!) self.assertEqual(utils.quote("\xfc\xa1\xa1\xa1\xa1\xa1"), "%FC%A1%A1%A1%A1%A1") # Valid 6 Octet Sequence (but not Unicode!)
def testQuoteUnquoteUnicode(self): "Unicode strings should be utf-8 encoded when passed through quote" self.assertEqual(utils.quote(u"Zo\xeb"), "Zo%C3%AB")
def extract_job_info(self, kickstart_output): """ This function reads the output from the kickstart parser and extracts the job information for the Stampede schema. It first looks for an invocation record, and then for a clustered record. Returns None if an error occurs, True if an invocation record was found, and False if it wasn't. """ # Check if we have anything if len(kickstart_output) == 0: return None # Kickstart was parsed self._kickstart_parsed = True # PM-1157 we construct run dir from job submit dir run_dir = self._job_submit_dir # Let's try to find an invocation record... my_invocation_found = False my_task_number = 0 self._stdout_text = "" # Initialize stdout stdout_text_list = [] stdout_size=0 for my_record in kickstart_output: if not "invocation" in my_record: # Not this one... skip to the next continue # Ok, we have an invocation record, extract the information we # need. Note that this may overwrite information obtained from # the submit file (e.g. the site_name). # Increment task_number my_task_number = my_task_number + 1 if not my_invocation_found: # Things we only need to do once if "resource" in my_record: self._site_name = my_record["resource"] if "user" in my_record: self._remote_user = my_record["user"] if "cwd" in my_record: self._remote_working_dir = my_record["cwd"] if "hostname" in my_record: self._host_id = my_record["hostname"] # We are done with this part my_invocation_found = True # PM-1109 encode signal information if it exists signal_message = " " if "signalled" in my_record: # construct our own error message attrs = my_record["signalled"] signal_message = "Job was " if "action" in attrs: signal_message += attrs["action"] if "signal" in attrs: signal_message += " with signal " + attrs["signal"] #PM-641 optimization Modified string concatenation to a list join if "stdout" in my_record: task_output = self.split_task_output( my_record["stdout"]) self._add_additional_monitoring_events(task_output.events) # PM-1152 we always attempt to store upto MAX_OUTPUT_LENGTH stdout = self.get_snippet_to_populate( task_output.user_data, my_task_number, stdout_size, "stdout") if stdout is not None: try: stdout_text_list.append(utils.quote("#@ %d stdout\n" % (my_task_number))) stdout_text_list.append(utils.quote(stdout)) stdout_text_list.append(utils.quote("\n")) stdout_size += len(stdout) + 20 except KeyError: logger.exception( "Unable to parse stdout section from kickstart record for task %s from file %s " %(my_task_number, self.get_rotated_out_filename() )) if "stderr" in my_record: task_error = self.split_task_output(my_record["stderr"]) # add the events to those retrieved from the application stderr self._add_additional_monitoring_events(task_error.events) # Note: we are populating task stderr from kickstart record to job stdout only stderr = self.get_snippet_to_populate( signal_message + task_error.user_data, my_task_number, stdout_size, "stderr") if stderr is not None: try: stdout_text_list.append(utils.quote("#@ %d stderr\n" % (my_task_number))) stdout_text_list.append(utils.quote(stderr)) stdout_text_list.append(utils.quote("\n")) stdout_size += len( stderr ) + 20 except KeyError: logger.exception( "Unable to parse stderr section from kickstart record for task %s from file %s " %(my_task_number, self.get_rotated_out_filename() )) if len(stdout_text_list) > 0 : self._stdout_text = "".join(stdout_text_list) #PM-641 optimization merged encoding above # Now, we encode it! # if self._stdout_text != "": # self._stdout_text = utils.quote(self._stdout_text) if not my_invocation_found: logger.debug("cannot find invocation record in output") # Look for clustered record... my_cluster_found = False for my_record in kickstart_output: if not "clustered" in my_record: # Not this one... skip to the next continue # Ok found it, fill in cluster parameters if "duration" in my_record: self._cluster_duration = my_record["duration"] if "start" in my_record: # Convert timestamp to EPOCH my_start = utils.epochdate(my_record["start"]) if my_start is not None: self._cluster_start_time = my_start # No need to look further... my_cluster_found = True break if not my_cluster_found: logger.debug("cannot find cluster record in output") # Done populating Job class with information from the output file return my_invocation_found
def testQuote(self): "Quoting should replace non-printing characters with XML character entity references" self.assertEqual(utils.quote("hello\r\n\t"), "hello%0D%0A%09") for i in range(0, 0x20): self.assertEqual(utils.quote(chr(i)), "%%%02X" % i) for i in range(0x20, 0x7F): if not chr(i) in "'\"%": self.assertEqual(utils.quote(chr(i)), chr(i)) for i in range(0x7F, 0xFF): self.assertEqual( utils.quote(i.to_bytes(length=1, byteorder="big")), "%%%02X" % i ) self.assertEqual(utils.quote("%"), "%25") self.assertEqual(utils.quote("'"), "%27") self.assertEqual(utils.quote('"'), "%22") self.assertEqual(utils.quote("Hello\nWorld!\n"), "Hello%0AWorld!%0A") self.assertEqual(utils.quote("Zoë"), "Zo%C3%AB") self.assertEqual(utils.quote("Zoë"), "Zo%C3%AB") self.assertEqual(utils.quote("Zo\xeb"), "Zo%C3%AB") self.assertEqual( utils.quote("warning: unused variable ‘Narr’"), "warning: unused variable %E2%80%98Narr%E2%80%99", ) self.assertEqual( utils.quote("warning: unused variable ‘Narr’"), "warning: unused variable %E2%80%98Narr%E2%80%99", ) self.assertEqual( utils.quote("warning: unused variable \u2018Narr\u2019"), "warning: unused variable %E2%80%98Narr%E2%80%99", )
def testQuoteInvalidChars(self): "Invalid UTF-8 byte strings should not cause quote to fail" self.assertEqual(utils.quote(b"\x80"), "%80") # Invalid 1 Octet Sequence self.assertEqual(utils.quote(b"\xc3\x28"), "%C3(") # Invalid 2 Octet Sequence self.assertEqual( utils.quote(b"\xa0\xa1"), "%A0%A1" ) # Invalid Sequence Identifier self.assertEqual( utils.quote(b"\xe2\x82\xa1"), "%E2%82%A1" ) # Valid 3 Octet Sequence self.assertEqual( utils.quote(b"\xe2\x28\xa1"), "%E2(%A1" ) # Invalid 3 Octet Sequence (in 2nd Octet) self.assertEqual( utils.quote(b"\xe2\x82\x28"), "%E2%82(" ) # Invalid 3 Octet Sequence (in 3rd Octet) self.assertEqual( utils.quote(b"\xf0\x90\x8c\xbc"), "%F0%90%8C%BC" ) # Valid 4 Octet Sequence self.assertEqual( utils.quote(b"\xf0\x28\x8c\xbc"), "%F0(%8C%BC" ) # Invalid 4 Octet Sequence (in 2nd Octet) self.assertEqual( utils.quote(b"\xf0\x90\x28\xbc"), "%F0%90(%BC" ) # Invalid 4 Octet Sequence (in 3rd Octet) self.assertEqual( utils.quote(b"\xf0\x28\x8c\x28"), "%F0(%8C(" ) # Invalid 4 Octet Sequence (in 4th Octet) self.assertEqual( utils.quote(b"\xf8\xa1\xa1\xa1\xa1"), "%F8%A1%A1%A1%A1" ) # Valid 5 Octet Sequence (but not Unicode!) self.assertEqual( utils.quote(b"\xfc\xa1\xa1\xa1\xa1\xa1"), "%FC%A1%A1%A1%A1%A1" ) # Valid 6 Octet Sequence (but not Unicode!)
def testQuoteUnquoteUnicode(self): "Unicode strings should be utf-8 encoded when passed through quote" self.assertEqual(utils.quote("Zo\xeb"), "Zo%C3%AB")
def testQuoteUnquoteLatin1(self): "A latin-1 encoded string should be unmodified through quote and unquote" self.assertEqual("R\xe9sum\xe9",utils.unquote(utils.quote("R\xe9sum\xe9"))) self.assertEqual(utils.quote("R\xe9sum\xe9"), "R%E9sum%E9") self.assertEqual("R\xe9sum\xe9",utils.unquote("R%E9sum%E9")) self.assertEqual("R\xe9sum\xe9",utils.unquote(u"R%E9sum%E9"))
def extract_job_info(self, run_dir, kickstart_output): """ This function reads the output from the kickstart parser and extracts the job information for the Stampede schema. It first looks for an invocation record, and then for a clustered record. Returns None if an error occurs, True if an invocation record was found, and False if it wasn't. """ # Check if we have anything if len(kickstart_output) == 0: return None # Kickstart was parsed self._kickstart_parsed = True # Let's try to find an invocation record... my_invocation_found = False my_task_number = 0 self._stdout_text = "" # Initialize stdout stdout_text_list = [] stdout_size=0 for my_record in kickstart_output: if not "invocation" in my_record: # Not this one... skip to the next continue # Ok, we have an invocation record, extract the information we # need. Note that this may overwrite information obtained from # the submit file (e.g. the site_name). # Increment task_number my_task_number = my_task_number + 1 if not my_invocation_found: # Things we only need to do once if "resource" in my_record: self._site_name = my_record["resource"] if "user" in my_record: self._remote_user = my_record["user"] if "cwd" in my_record: self._remote_working_dir = my_record["cwd"] if "hostname" in my_record: self._host_id = my_record["hostname"] # We are done with this part my_invocation_found = True #PM-641 optimization Modified string concatenation to a list join if "stdout" in my_record: if len(my_record["stdout"])<= MAX_OUTPUT_LENGTH - stdout_size: stdout_text_list.append(utils.quote("#@ %d stdout\n" % (my_task_number))) stdout_text_list.append(utils.quote(my_record["stdout"])) stdout_text_list.append(utils.quote("\n")) stdout_size+=len(my_record["stdout"])+20 if "stderr" in my_record: if len(my_record["stderr"]) <= MAX_OUTPUT_LENGTH - stdout_size : stdout_text_list.append(utils.quote("#@ %d stderr\n" % (my_task_number))) stdout_text_list.append(utils.quote(my_record["stderr"])) stdout_text_list.append(utils.quote("\n")) stdout_size+=len(my_record["stderr"])+20 if len(stdout_text_list) > 0 : self._stdout_text = "".join(stdout_text_list) #PM-641 optimization merged encoding above # Now, we encode it! # if self._stdout_text != "": # self._stdout_text = utils.quote(self._stdout_text) if not my_invocation_found: logger.debug("cannot find invocation record in output") # Look for clustered record... my_cluster_found = False for my_record in kickstart_output: if not "clustered" in my_record: # Not this one... skip to the next continue # Ok found it, fill in cluster parameters if "duration" in my_record: self._cluster_duration = my_record["duration"] if "start" in my_record: # Convert timestamp to EPOCH my_start = utils.epochdate(my_record["start"]) if my_start is not None: self._cluster_start_time = my_start # No need to look further... my_cluster_found = True break if not my_cluster_found: logger.debug("cannot find cluster record in output") # Finally, read error file only my_err_file = os.path.join(run_dir, self._error_file) if my_invocation_found: # in my job output there were some invocation records # assume then that they are rotated also my_err_file = my_err_file + ".%03d" % (self._job_output_counter) try: ERR = open(my_err_file, 'r') self._stderr_text = utils.quote(ERR.read()) except IOError: self._stderr_text = None logger.warning("unable to read error file: %s, continuing..." % (my_err_file)) else: ERR.close() # Done populating Job class with information from the output file return my_invocation_found
def extract_job_info(self, kickstart_output): """ This function reads the output from the kickstart parser and extracts the job information for the Stampede schema. It first looks for an invocation record, and then for a clustered record. Returns None if an error occurs, True if an invocation record was found, and False if it wasn't. """ # Check if we have anything if len(kickstart_output) == 0: return None # Kickstart was parsed self._kickstart_parsed = True # PM-1157 we construct run dir from job submit dir self._job_submit_dir # Let's try to find an invocation record... my_invocation_found = False my_task_number = 0 self._stdout_text = "" # Initialize stdout stdout_text_list = [] stdout_size = 0 for my_record in kickstart_output: if "multipart" in my_record: # PM-1390 convert to integrity metrics logger.debug("Multipart record %s", my_record) self._add_multipart_events([my_record]) elif not "invocation" in my_record: # Not this one... skip to the next logger.trace("Skipping %s", my_record) continue # Ok, we have an invocation record, extract the information we # need. Note that this may overwrite information obtained from # the submit file (e.g. the site_name). # Increment task_number my_task_number = my_task_number + 1 if not my_invocation_found: # Things we only need to do once if "resource" in my_record: self._site_name = my_record["resource"] if "user" in my_record: self._remote_user = my_record["user"] if "cwd" in my_record: self._remote_working_dir = my_record["cwd"] if "hostname" in my_record: self._host_id = my_record["hostname"] # We are done with this part my_invocation_found = True # PM-1109 encode signal information if it exists signal_message = " " if "signalled" in my_record: # construct our own error message attrs = my_record["signalled"] signal_message = "Job was " if "action" in attrs: signal_message += attrs["action"] if "signal" in attrs: signal_message += " with signal " + attrs["signal"] # PM-641 optimization Modified string concatenation to a list join if "stdout" in my_record: task_output = self.split_task_output(my_record["stdout"]) self._add_additional_monitoring_events(task_output.events) # PM-1152 we always attempt to store upto MAX_OUTPUT_LENGTH stdout = self.get_snippet_to_populate(task_output.user_data, my_task_number, stdout_size, "stdout") if stdout is not None: try: stdout_text_list.append( utils.quote("#@ %d stdout\n" % (my_task_number))) stdout_text_list.append(utils.quote(stdout)) stdout_text_list.append(utils.quote("\n")) stdout_size += len(stdout) + 20 except KeyError: logger.exception( "Unable to parse stdout section from kickstart record for task %s from file %s " % (my_task_number, self.get_rotated_out_filename())) if "stderr" in my_record: task_error = self.split_task_output(my_record["stderr"]) # add the events to those retrieved from the application stderr self._add_additional_monitoring_events(task_error.events) # Note: we are populating task stderr from kickstart record to job stdout only stderr = self.get_snippet_to_populate( signal_message + task_error.user_data, my_task_number, stdout_size, "stderr", ) if stderr is not None: try: stdout_text_list.append( utils.quote("#@ %d stderr\n" % (my_task_number))) stdout_text_list.append(utils.quote(stderr)) stdout_text_list.append(utils.quote("\n")) stdout_size += len(stderr) + 20 except KeyError: logger.exception( "Unable to parse stderr section from kickstart record for task %s from file %s " % (my_task_number, self.get_rotated_out_filename())) # PM-1398 pass cpu info if "cpu" in my_record: self._cpu_attribs = my_record["cpu"] if len(stdout_text_list) > 0: self._stdout_text = "".join(stdout_text_list) # PM-641 optimization merged encoding above # Now, we encode it! # if self._stdout_text != "": # self._stdout_text = utils.quote(self._stdout_text) if not my_invocation_found: logger.debug("cannot find invocation record in output") # Look for clustered record... my_cluster_found = False for my_record in kickstart_output: if not "clustered" in my_record: # Not this one... skip to the next continue # Ok found it, fill in cluster parameters if "duration" in my_record: self._cluster_duration = my_record["duration"] if "start" in my_record: # Convert timestamp to EPOCH my_start = utils.epochdate(my_record["start"]) if my_start is not None: self._cluster_start_time = my_start # No need to look further... my_cluster_found = True break if not my_cluster_found: logger.debug("cannot find cluster record in output") # Done populating Job class with information from the output file return my_invocation_found
def testQuote(self): "Quoting should escape non-printing characters" self.assertEquals("hello%0D%0A%09", utils.quote("hello\r\n\t"))
def extract_job_info(self, run_dir, kickstart_output): """ This function reads the output from the kickstart parser and extracts the job information for the Stampede schema. It first looks for an invocation record, and then for a clustered record. Returns None if an error occurs, True if an invocation record was found, and False if it wasn't. """ # Check if we have anything if len(kickstart_output) == 0: return None # Kickstart was parsed self._kickstart_parsed = True # Let's try to find an invocation record... my_invocation_found = False my_task_number = 0 self._stdout_text = "" # Initialize stdout stdout_text_list = [] stdout_size=0 for my_record in kickstart_output: if not "invocation" in my_record: # Not this one... skip to the next continue # Ok, we have an invocation record, extract the information we # need. Note that this may overwrite information obtained from # the submit file (e.g. the site_name). # Increment task_number my_task_number = my_task_number + 1 if not my_invocation_found: # Things we only need to do once if "resource" in my_record: self._site_name = my_record["resource"] if "user" in my_record: self._remote_user = my_record["user"] if "cwd" in my_record: self._remote_working_dir = my_record["cwd"] if "hostname" in my_record: self._host_id = my_record["hostname"] # We are done with this part my_invocation_found = True #PM-641 optimization Modified string concatenation to a list join if "stdout" in my_record: if len(my_record["stdout"])<= MAX_OUTPUT_LENGTH - stdout_size: try: stdout_text_list.append(utils.quote("#@ %d stdout\n" % (my_task_number))) stdout_text_list.append(utils.quote(my_record["stdout"])) stdout_text_list.append(utils.quote("\n")) stdout_size+=len(my_record["stdout"])+20 except KeyError: logger.exception( "Unable to parse stdout section from kickstart record for task %s from file %s " %(my_task_number, self.get_rotated_out_filename() )) if "stderr" in my_record: if len(my_record["stderr"]) <= MAX_OUTPUT_LENGTH - stdout_size : try: stdout_text_list.append(utils.quote("#@ %d stderr\n" % (my_task_number))) stdout_text_list.append(utils.quote(my_record["stderr"])) stdout_text_list.append(utils.quote("\n")) stdout_size+=len(my_record["stderr"])+20 except KeyError: logger.exception( "Unable to parse stderr section from kickstart record for task %s from file %s " %(my_task_number, self.get_rotated_out_filename() )) if len(stdout_text_list) > 0 : self._stdout_text = "".join(stdout_text_list) #PM-641 optimization merged encoding above # Now, we encode it! # if self._stdout_text != "": # self._stdout_text = utils.quote(self._stdout_text) if not my_invocation_found: logger.debug("cannot find invocation record in output") # Look for clustered record... my_cluster_found = False for my_record in kickstart_output: if not "clustered" in my_record: # Not this one... skip to the next continue # Ok found it, fill in cluster parameters if "duration" in my_record: self._cluster_duration = my_record["duration"] if "start" in my_record: # Convert timestamp to EPOCH my_start = utils.epochdate(my_record["start"]) if my_start is not None: self._cluster_start_time = my_start # No need to look further... my_cluster_found = True break if not my_cluster_found: logger.debug("cannot find cluster record in output") # Finally, read error file only my_err_file = os.path.join(run_dir, self._error_file) if my_invocation_found: # in my job output there were some invocation records # assume then that they are rotated also my_err_file = my_err_file + ".%03d" % (self._job_output_counter) try: ERR = open(my_err_file, 'r') self._stderr_text = utils.quote(ERR.read()) except IOError: self._stderr_text = None logger.warning("unable to read error file: %s, continuing..." % (my_err_file)) else: ERR.close() # Done populating Job class with information from the output file return my_invocation_found