Beispiel #1
0
    def test_dump_error_file_overwrite_existing(self):
        dst_error_file = os.path.join(self.test_dir, "dst_error.json")
        src_error_file = os.path.join(self.test_dir, "src_error.json")
        _write_error(RuntimeError("foo"), dst_error_file)
        _write_error(RuntimeError("bar"), src_error_file)

        with patch.dict(os.environ, {"TORCHELASTIC_ERROR_FILE": dst_error_file}):
            eh = ErrorHandler()
            eh.dump_error_file(src_error_file)
            self.assertTrue(filecmp.cmp(src_error_file, dst_error_file))
Beispiel #2
0
 def failure_with_error_file(self, exception):
     with mock.patch.dict(
             os.environ, {"TORCHELASTIC_ERROR_FILE": self.test_error_file}):
         ErrorHandler().record_exception(exception)
     return ProcessFailure(local_rank=0,
                           pid=997,
                           exitcode=1,
                           error_file=self.test_error_file)
Beispiel #3
0
def raise_child_failure_error_fn(name, child_error_file=""):
    if child_error_file:
        with mock.patch.dict(os.environ,
                             {"TORCHELASTIC_ERROR_FILE": child_error_file}):
            ErrorHandler().record_exception(SentinelError("foobar"))
    pf = ProcessFailure(local_rank=0,
                        pid=997,
                        exitcode=1,
                        error_file=child_error_file)
    raise ChildFailedError(name, {0: pf})
Beispiel #4
0
    def test_copy_error_file(self):
        src_error_file = os.path.join(self.test_dir, "src_error.json")
        _write_error(RuntimeError("foobar"), src_error_file)

        with patch.dict(os.environ,
                        {"TORCHELASTIC_ERROR_FILE": self.test_error_file}):
            eh = ErrorHandler()
            eh.copy_error_file(src_error_file)
            self.assertTrue(filecmp.cmp(src_error_file, self.test_error_file))

        with patch.dict(os.environ, {}):
            eh = ErrorHandler()
            eh.copy_error_file(src_error_file)
Beispiel #5
0
    def test_dump_error_file(self):
        src_error_file = os.path.join(self.test_dir, "src_error.json")
        eh = ErrorHandler()
        with patch.dict(os.environ,
                        {"TORCHELASTIC_ERROR_FILE": src_error_file}):
            eh.record_exception(RuntimeError("foobar"))

        with patch.dict(os.environ,
                        {"TORCHELASTIC_ERROR_FILE": self.test_error_file}):
            eh.dump_error_file(src_error_file)
            self.assertTrue(filecmp.cmp(src_error_file, self.test_error_file))

        with patch.dict(os.environ, {}):
            eh.dump_error_file(src_error_file)
Beispiel #6
0
    def test_record_exception(self):
        with patch.dict(os.environ,
                        {"TORCHELASTIC_ERROR_FILE": self.test_error_file}):
            eh = ErrorHandler()
            eh.initialize()

            try:
                raise_exception_fn()
            except Exception as e:
                eh.record_exception(e)

            with open(self.test_error_file, "r") as fp:
                err = json.load(fp)
                # error file content example:
                # {
                #   "message": {
                #     "message": "RuntimeError: foobar",
                #     "extraInfo": {
                #       "py_callstack": "Traceback (most recent call last):\n  <... OMITTED ...>",
                #       "timestamp": "1605774851"
                #     }
                #   }
            self.assertIsNotNone(err["message"]["message"])
            self.assertIsNotNone(err["message"]["extraInfo"]["py_callstack"])
            self.assertIsNotNone(err["message"]["extraInfo"]["timestamp"])
Beispiel #7
0
 def test_record_exception_no_error_file(self):
     # make sure record does not fail when no error file is specified in env vars
     with patch.dict(os.environ, {}):
         eh = ErrorHandler()
         eh.initialize()
         try:
             raise_exception_fn()
         except Exception as e:
             eh.record_exception(e)
Beispiel #8
0
def get_error_handler():
    return ErrorHandler()
Beispiel #9
0
 def test_initialize_error(self, fh_enable_mock):
     # makes sure that initialize handles errors gracefully
     ErrorHandler().initialize()
     fh_enable_mock.assert_called_once()
Beispiel #10
0
 def test_initialize(self, fh_enable_mock):
     ErrorHandler().initialize()
     fh_enable_mock.assert_called_once()