Esempio n. 1
0
  def trainThreadFunc():
    try:
      assert TargetMode
      if TargetMode == "target-alignment":
        pass  # Ok.
      elif TargetMode == "criterion-by-sprint":
        # TODO ...
        raise NotImplementedError
      else:
        raise Exception("target-mode not supported: %s" % TargetMode)

      engine.init_train_from_config(config, train_data=sprintDataset)

      # If some epoch is explicitly specified, it checks whether it matches.
      if epoch is not None:
        assert epoch == engine.start_epoch

      # Do the actual training.
      engine.train()

    except KeyboardInterrupt:  # This happens at forced exit.
      pass

    except BaseException:  # Catch all, even SystemExit. We must stop the main thread then.
      try:
        print "CRNN train failed"
        sys.excepthook(*sys.exc_info())
      finally:
        # Exceptions are fatal. Stop now.
        interrupt_main()
Esempio n. 2
0
 def run(self):
     # Wrap run_inner() for better exception printing.
     # Thread.__bootstrap_inner() ignores sys.excepthook.
     try:
         self.run_inner()
     except ProcConnectionDied:
         if not getattr(sys, "exited", False):
             # Normally we should have caught that in run_inner(), so somewhat unexpected.
             print("%s. Some device proc crashed unexpectedly." % self,
                   file=log.v4)
         # Just pass on. We have self.finalized == False which indicates the problem.
     except Exception:
         # Catch all standard exceptions.
         # These are not device errors. We should have caught them in the code
         # and we would leave self.finalized == False.
         # Don't catch KeyboardInterrupt here because that will get send by the main thread
         # when it is exiting. It's never by the user because SIGINT will always
         # trigger KeyboardInterrupt in the main thread only.
         try:
             print("%s failed" % self.name, file=log.v1)
             if log.v[4]:
                 sys.excepthook(*sys.exc_info())
                 print("")
         finally:
             # Exceptions are fatal. If we can recover, we should handle it in run_inner().
             interrupt_main()
Esempio n. 3
0
    def trainThreadFunc():
        try:
            assert TargetMode
            if TargetMode == "target-alignment":
                pass  # Ok.
            elif TargetMode == "criterion-by-sprint":
                # TODO ...
                raise NotImplementedError
            else:
                raise Exception("target-mode not supported: %s" % TargetMode)

            engine.init_train_from_config(config, train_data=sprintDataset)

            # If some epoch is explicitly specified, it checks whether it matches.
            if epoch is not None:
                assert epoch == engine.start_epoch

            # Do the actual training.
            engine.train()

        except KeyboardInterrupt:  # This happens at forced exit.
            pass

        except BaseException:  # Catch all, even SystemExit. We must stop the main thread then.
            try:
                print("CRNN train failed")
                sys.excepthook(*sys.exc_info())
            finally:
                # Exceptions are fatal. Stop now.
                interrupt_main()
Esempio n. 4
0
 def run(self):
   # Wrap run_inner() for better exception printing.
   # Thread.__bootstrap_inner() ignores sys.excepthook.
   try:
     self.run_inner()
   except ProcConnectionDied:
     if not getattr(sys, "exited", False):
       # Normally we should have caught that in run_inner(), so somewhat unexpected.
       print >> log.v4, "%s. Some device proc crashed unexpectedly." % self
     # Just pass on. We have self.finalized == False which indicates the problem.
   except Exception:
     # Catch all standard exceptions.
     # These are not device errors. We should have caught them in the code
     # and we would leave self.finalized == False.
     # Don't catch KeyboardInterrupt here because that will get send by the main thread
     # when it is exiting. It's never by the user because SIGINT will always
     # trigger KeyboardInterrupt in the main thread only.
     try:
       print >> log.v1, "%s failed" % self.name
       if log.v[4]:
         sys.excepthook(*sys.exc_info())
         print("")
     finally:
       # Exceptions are fatal. If we can recover, we should handle it in run_inner().
       interrupt_main()
Esempio n. 5
0
    def _thread_main(self):
        from Util import interrupt_main
        try:
            import better_exchook
            better_exchook.install()
            from Util import AsyncThreadRun

            # First iterate once over the data to get the data len as fast as possible.
            data_len = 0
            while True:
                ls = self._data_files["data"].readlines(10**4)
                data_len += len(ls)
                if not ls:
                    break
            with self._lock:
                self._data_len = data_len
            self._data_files["data"].seek(
                0, os.SEEK_SET)  # we will read it again below

            # Now, read and use the vocab for a compact representation in memory.
            keys_to_read = ["data", "classes"]
            while True:
                for k in list(keys_to_read):
                    data_strs = self._data_files[k].readlines(10**6)
                    if not data_strs:
                        assert len(self._data[k]) == self._data_len
                        keys_to_read.remove(k)
                        continue
                    assert len(
                        self._data[k]) + len(data_strs) <= self._data_len
                    vocab = self._vocabs[k]
                    data = [
                        self._data_str_to_numpy(
                            vocab,
                            s.decode("utf8").strip() + self._add_postfix[k])
                        for s in data_strs
                    ]
                    with self._lock:
                        self._data[k].extend(data)
                if not keys_to_read:
                    break
            for k, f in list(self._data_files.items()):
                f.close()
                self._data_files[k] = None

        except Exception:
            sys.excepthook(*sys.exc_info())
            interrupt_main()
Esempio n. 6
0
    def reader_thread_proc(self, child_pid, epoch):
        try:
            self.add_data_thread_id = thread.get_ident()

            self.initSprintEpoch(epoch)
            haveSeenTheWhole = False

            while not self.python_exit and self.child_pid:
                try:
                    dataType, args = self._read_next_raw()
                except (IOError, EOFError):
                    with self.lock:
                        if epoch != self.crnnEpoch:
                            # We have passed on to a new epoch. This is a valid reason that the child has been killed.
                            break
                        if self.python_exit or not self.child_pid:
                            break
                    raise

                with self.lock:
                    if epoch != self.crnnEpoch:
                        break
                    if self.python_exit or not self.child_pid:
                        break

                    if dataType == "data":
                        segmentName, features, targets = args
                        self.addNewData(numpy_copy_and_set_unused(features),
                                        numpy_copy_and_set_unused(targets),
                                        segmentName=segmentName)
                    elif dataType == "exit":
                        haveSeenTheWhole = True
                        break
                    else:
                        assert False, "not handled: (%r, %r)" % (dataType,
                                                                 args)

            if self.seq_list_file:
                try:
                    os.remove(self.seq_list_file)
                except Exception as e:
                    print("ExternSprintDataset: error when removing %r: %r" %
                          (self.seq_list_file, e),
                          file=log.v5)
                finally:
                    self.seq_list_file = None

            if not self.python_exit and self.child_pid:
                with self.lock:
                    self.finishSprintEpoch()
                    if haveSeenTheWhole:
                        self._num_seqs = self.next_seq_to_be_added
            print("ExternSprintDataset finished reading epoch %i" % epoch,
                  file=log.v5)

        except Exception:
            if not self.python_exit:
                # Catch all standard exceptions.
                # Don't catch KeyboardInterrupt here because that will get send by the main thread
                # when it is exiting. It's never by the user because SIGINT will always
                # trigger KeyboardInterrupt in the main thread only.
                try:
                    print("ExternSprintDataset reader failed", file=log.v1)
                    sys.excepthook(*sys.exc_info())
                    print("")
                finally:
                    # Exceptions are fatal. If we can recover, we should handle it in run_inner().
                    interrupt_main()
Esempio n. 7
0
  def _reader_thread_proc(self, child_pid, epoch):
    """
    :param int child_pid:
    :param int epoch:
    """
    try:
      self.add_data_thread_id = thread.get_ident()

      self.init_sprint_epoch(epoch)
      have_seen_the_whole = False

      seq_count = 0
      while not self.python_exit and self.child_pid:
        try:
          data_type, args = self._read_next_raw()
        except (IOError, EOFError):
          with self.lock:
            if epoch != self.crnnEpoch:
              # We have passed on to a new epoch. This is a valid reason that the child has been killed.
              break
            if self.python_exit or not self.child_pid:
              break
          raise

        with self.lock:
          if epoch != self.crnnEpoch:
            break
          if self.python_exit or not self.child_pid:
            break

          if data_type == b"data":
            seq_count += 1
            segment_name, features, targets = args
            if segment_name is not None:
              segment_name = segment_name.decode("utf8")
            assert isinstance(features, numpy.ndarray)
            if isinstance(targets, dict):
              targets = {key.decode("utf8"): value for (key, value) in targets.items()}
            self.add_new_data(
              numpy_copy_and_set_unused(features),
              numpy_copy_and_set_unused(targets),
              segment_name=segment_name)
          elif data_type == b"exit":
            have_seen_the_whole = True
            break
          else:
            assert False, "not handled: (%r, %r)" % (data_type, args)

      if self.seq_list_file:
        try:
          os.remove(self.seq_list_file)
        except Exception as e:
          print("%s: error when removing %r: %r" % (self, self.seq_list_file, e), file=log.v5)
        finally:
          self.seq_list_file = None

      if not self.python_exit:
        with self.lock:
          self.finish_sprint_epoch(seen_all=have_seen_the_whole)
          if have_seen_the_whole:
            self._num_seqs = self.next_seq_to_be_added
      print("%s (proc %i) finished reading epoch %i, seen all %r (finished), num seqs %i" % (
        self, child_pid, epoch, have_seen_the_whole, seq_count), file=log.v5)

    except Exception as exc:
      if not self.python_exit:
        # Catch all standard exceptions.
        # Don't catch KeyboardInterrupt here because that will get send by the main thread
        # when it is exiting. It's never by the user because SIGINT will always
        # trigger KeyboardInterrupt in the main thread only.
        if epoch == self.crnnEpoch:
          with self.lock:
            self.finish_sprint_epoch(seen_all=False)
        try:
          print("%s reader failed (%s)" % (self, exc), file=log.v1)
          sys.excepthook(*sys.exc_info())
          print("")
        finally:
          # Exceptions are fatal. If we can recover, we should handle it in run_inner().
          interrupt_main()
Esempio n. 8
0
  def _reader_thread_proc(self, child_pid, epoch):
    """
    :param int child_pid:
    :param int epoch:
    """
    try:
      self.add_data_thread_id = thread.get_ident()

      self.init_sprint_epoch(epoch)
      have_seen_the_whole = False

      seq_count = 0
      while not self.python_exit and self.child_pid:
        try:
          data_type, args = self._read_next_raw()
        except (IOError, EOFError):
          with self.lock:
            if epoch != self.crnnEpoch:
              # We have passed on to a new epoch. This is a valid reason that the child has been killed.
              break
            if self.python_exit or not self.child_pid:
              break
          raise

        with self.lock:
          if epoch != self.crnnEpoch:
            break
          if self.python_exit or not self.child_pid:
            break

          if data_type == b"data":
            seq_count += 1
            segment_name, features, targets = args
            if segment_name is not None:
              segment_name = segment_name.decode("utf8")
            assert isinstance(features, numpy.ndarray)
            if isinstance(targets, dict):
              targets = {key.decode("utf8"): value for (key, value) in targets.items()}
            self.add_new_data(
              numpy_copy_and_set_unused(features),
              numpy_copy_and_set_unused(targets),
              segment_name=segment_name)
          elif data_type == b"exit":
            have_seen_the_whole = True
            break
          else:
            assert False, "not handled: (%r, %r)" % (data_type, args)

      if self.seq_list_file:
        try:
          os.remove(self.seq_list_file)
        except Exception as e:
          print("%s: error when removing %r: %r" % (self, self.seq_list_file, e), file=log.v5)
        finally:
          self.seq_list_file = None

      if not self.python_exit:
        with self.lock:
          self.finish_sprint_epoch(seen_all=have_seen_the_whole)
          if have_seen_the_whole:
            self._num_seqs = self.next_seq_to_be_added
      print("%s (proc %i) finished reading epoch %i, seen all %r (finished), num seqs %i" % (
        self, child_pid, epoch, have_seen_the_whole, seq_count), file=log.v5)

    except Exception as exc:
      if not self.python_exit:
        # Catch all standard exceptions.
        # Don't catch KeyboardInterrupt here because that will get send by the main thread
        # when it is exiting. It's never by the user because SIGINT will always
        # trigger KeyboardInterrupt in the main thread only.
        if epoch == self.crnnEpoch:
          with self.lock:
            self.finish_sprint_epoch(seen_all=False)
        try:
          print("%s reader failed (%s)" % (self, exc), file=log.v1)
          sys.excepthook(*sys.exc_info())
          print("")
        finally:
          # Exceptions are fatal. If we can recover, we should handle it in run_inner().
          interrupt_main()
Esempio n. 9
0
  def reader_thread_proc(self, child_pid, epoch):
    try:
      self.add_data_thread_id = thread.get_ident()

      self.initSprintEpoch(epoch)
      haveSeenTheWhole = False

      while not self.python_exit:
        try:
          dataType, args = self._read_next_raw()
        except (IOError, EOFError):
          with self.lock:
            if epoch != self.crnnEpoch:
              # We have passed on to a new epoch. This is a valid reason that the child has been killed.
              break
            if self.python_exit:
              break
          raise

        with self.lock:
          if epoch != self.crnnEpoch:
            break
          if self.python_exit:
            break

          if dataType == "data":
            segmentName, features, targets = args
            self.addNewData(numpy_copy_and_set_unused(features), numpy_copy_and_set_unused(targets), segmentName=segmentName)
          elif dataType == "exit":
            haveSeenTheWhole = True
            break
          else:
            assert False, "not handled: (%r, %r)" % (dataType, args)

      if self.seq_list_file:
        try:
          os.remove(self.seq_list_file)
        except Exception as e:
          print >> log.v5, "ExternSprintDataset: error when removing %r: %r" % (self.seq_list_file, e)
        finally:
          self.seq_list_file = None

      if not self.python_exit:
        with self.lock:
          self.finishSprintEpoch()
          if haveSeenTheWhole:
            self._num_seqs = self.next_seq_to_be_added
      print >> log.v5, "ExternSprintDataset finished reading epoch %i" % epoch

    except Exception:
      # Catch all standard exceptions.
      # Don't catch KeyboardInterrupt here because that will get send by the main thread
      # when it is exiting. It's never by the user because SIGINT will always
      # trigger KeyboardInterrupt in the main thread only.
      try:
        print >> log.v1, "ExternSprintDataset reader failed"
        sys.excepthook(*sys.exc_info())
        print ""
      finally:
        # Exceptions are fatal. If we can recover, we should handle it in run_inner().
        interrupt_main()