def trainThreadFunc(): try: assert TargetMode if TargetMode == "target-alignment": pass # Ok. elif TargetMode == "criterion-by-sprint": # TODO ... raise NotImplementedError else: raise Exception("target-mode not supported: %s" % TargetMode) engine.init_train_from_config(config, train_data=sprintDataset) # If some epoch is explicitly specified, it checks whether it matches. if epoch is not None: assert epoch == engine.start_epoch # Do the actual training. engine.train() except KeyboardInterrupt: # This happens at forced exit. pass except BaseException: # Catch all, even SystemExit. We must stop the main thread then. try: print "CRNN train failed" sys.excepthook(*sys.exc_info()) finally: # Exceptions are fatal. Stop now. interrupt_main()
def run(self): # Wrap run_inner() for better exception printing. # Thread.__bootstrap_inner() ignores sys.excepthook. try: self.run_inner() except ProcConnectionDied: if not getattr(sys, "exited", False): # Normally we should have caught that in run_inner(), so somewhat unexpected. print("%s. Some device proc crashed unexpectedly." % self, file=log.v4) # Just pass on. We have self.finalized == False which indicates the problem. except Exception: # Catch all standard exceptions. # These are not device errors. We should have caught them in the code # and we would leave self.finalized == False. # Don't catch KeyboardInterrupt here because that will get send by the main thread # when it is exiting. It's never by the user because SIGINT will always # trigger KeyboardInterrupt in the main thread only. try: print("%s failed" % self.name, file=log.v1) if log.v[4]: sys.excepthook(*sys.exc_info()) print("") finally: # Exceptions are fatal. If we can recover, we should handle it in run_inner(). interrupt_main()
def trainThreadFunc(): try: assert TargetMode if TargetMode == "target-alignment": pass # Ok. elif TargetMode == "criterion-by-sprint": # TODO ... raise NotImplementedError else: raise Exception("target-mode not supported: %s" % TargetMode) engine.init_train_from_config(config, train_data=sprintDataset) # If some epoch is explicitly specified, it checks whether it matches. if epoch is not None: assert epoch == engine.start_epoch # Do the actual training. engine.train() except KeyboardInterrupt: # This happens at forced exit. pass except BaseException: # Catch all, even SystemExit. We must stop the main thread then. try: print("CRNN train failed") sys.excepthook(*sys.exc_info()) finally: # Exceptions are fatal. Stop now. interrupt_main()
def run(self): # Wrap run_inner() for better exception printing. # Thread.__bootstrap_inner() ignores sys.excepthook. try: self.run_inner() except ProcConnectionDied: if not getattr(sys, "exited", False): # Normally we should have caught that in run_inner(), so somewhat unexpected. print >> log.v4, "%s. Some device proc crashed unexpectedly." % self # Just pass on. We have self.finalized == False which indicates the problem. except Exception: # Catch all standard exceptions. # These are not device errors. We should have caught them in the code # and we would leave self.finalized == False. # Don't catch KeyboardInterrupt here because that will get send by the main thread # when it is exiting. It's never by the user because SIGINT will always # trigger KeyboardInterrupt in the main thread only. try: print >> log.v1, "%s failed" % self.name if log.v[4]: sys.excepthook(*sys.exc_info()) print("") finally: # Exceptions are fatal. If we can recover, we should handle it in run_inner(). interrupt_main()
def _thread_main(self): from Util import interrupt_main try: import better_exchook better_exchook.install() from Util import AsyncThreadRun # First iterate once over the data to get the data len as fast as possible. data_len = 0 while True: ls = self._data_files["data"].readlines(10**4) data_len += len(ls) if not ls: break with self._lock: self._data_len = data_len self._data_files["data"].seek( 0, os.SEEK_SET) # we will read it again below # Now, read and use the vocab for a compact representation in memory. keys_to_read = ["data", "classes"] while True: for k in list(keys_to_read): data_strs = self._data_files[k].readlines(10**6) if not data_strs: assert len(self._data[k]) == self._data_len keys_to_read.remove(k) continue assert len( self._data[k]) + len(data_strs) <= self._data_len vocab = self._vocabs[k] data = [ self._data_str_to_numpy( vocab, s.decode("utf8").strip() + self._add_postfix[k]) for s in data_strs ] with self._lock: self._data[k].extend(data) if not keys_to_read: break for k, f in list(self._data_files.items()): f.close() self._data_files[k] = None except Exception: sys.excepthook(*sys.exc_info()) interrupt_main()
def reader_thread_proc(self, child_pid, epoch): try: self.add_data_thread_id = thread.get_ident() self.initSprintEpoch(epoch) haveSeenTheWhole = False while not self.python_exit and self.child_pid: try: dataType, args = self._read_next_raw() except (IOError, EOFError): with self.lock: if epoch != self.crnnEpoch: # We have passed on to a new epoch. This is a valid reason that the child has been killed. break if self.python_exit or not self.child_pid: break raise with self.lock: if epoch != self.crnnEpoch: break if self.python_exit or not self.child_pid: break if dataType == "data": segmentName, features, targets = args self.addNewData(numpy_copy_and_set_unused(features), numpy_copy_and_set_unused(targets), segmentName=segmentName) elif dataType == "exit": haveSeenTheWhole = True break else: assert False, "not handled: (%r, %r)" % (dataType, args) if self.seq_list_file: try: os.remove(self.seq_list_file) except Exception as e: print("ExternSprintDataset: error when removing %r: %r" % (self.seq_list_file, e), file=log.v5) finally: self.seq_list_file = None if not self.python_exit and self.child_pid: with self.lock: self.finishSprintEpoch() if haveSeenTheWhole: self._num_seqs = self.next_seq_to_be_added print("ExternSprintDataset finished reading epoch %i" % epoch, file=log.v5) except Exception: if not self.python_exit: # Catch all standard exceptions. # Don't catch KeyboardInterrupt here because that will get send by the main thread # when it is exiting. It's never by the user because SIGINT will always # trigger KeyboardInterrupt in the main thread only. try: print("ExternSprintDataset reader failed", file=log.v1) sys.excepthook(*sys.exc_info()) print("") finally: # Exceptions are fatal. If we can recover, we should handle it in run_inner(). interrupt_main()
def _reader_thread_proc(self, child_pid, epoch): """ :param int child_pid: :param int epoch: """ try: self.add_data_thread_id = thread.get_ident() self.init_sprint_epoch(epoch) have_seen_the_whole = False seq_count = 0 while not self.python_exit and self.child_pid: try: data_type, args = self._read_next_raw() except (IOError, EOFError): with self.lock: if epoch != self.crnnEpoch: # We have passed on to a new epoch. This is a valid reason that the child has been killed. break if self.python_exit or not self.child_pid: break raise with self.lock: if epoch != self.crnnEpoch: break if self.python_exit or not self.child_pid: break if data_type == b"data": seq_count += 1 segment_name, features, targets = args if segment_name is not None: segment_name = segment_name.decode("utf8") assert isinstance(features, numpy.ndarray) if isinstance(targets, dict): targets = {key.decode("utf8"): value for (key, value) in targets.items()} self.add_new_data( numpy_copy_and_set_unused(features), numpy_copy_and_set_unused(targets), segment_name=segment_name) elif data_type == b"exit": have_seen_the_whole = True break else: assert False, "not handled: (%r, %r)" % (data_type, args) if self.seq_list_file: try: os.remove(self.seq_list_file) except Exception as e: print("%s: error when removing %r: %r" % (self, self.seq_list_file, e), file=log.v5) finally: self.seq_list_file = None if not self.python_exit: with self.lock: self.finish_sprint_epoch(seen_all=have_seen_the_whole) if have_seen_the_whole: self._num_seqs = self.next_seq_to_be_added print("%s (proc %i) finished reading epoch %i, seen all %r (finished), num seqs %i" % ( self, child_pid, epoch, have_seen_the_whole, seq_count), file=log.v5) except Exception as exc: if not self.python_exit: # Catch all standard exceptions. # Don't catch KeyboardInterrupt here because that will get send by the main thread # when it is exiting. It's never by the user because SIGINT will always # trigger KeyboardInterrupt in the main thread only. if epoch == self.crnnEpoch: with self.lock: self.finish_sprint_epoch(seen_all=False) try: print("%s reader failed (%s)" % (self, exc), file=log.v1) sys.excepthook(*sys.exc_info()) print("") finally: # Exceptions are fatal. If we can recover, we should handle it in run_inner(). interrupt_main()
def _reader_thread_proc(self, child_pid, epoch): """ :param int child_pid: :param int epoch: """ try: self.add_data_thread_id = thread.get_ident() self.init_sprint_epoch(epoch) have_seen_the_whole = False seq_count = 0 while not self.python_exit and self.child_pid: try: data_type, args = self._read_next_raw() except (IOError, EOFError): with self.lock: if epoch != self.crnnEpoch: # We have passed on to a new epoch. This is a valid reason that the child has been killed. break if self.python_exit or not self.child_pid: break raise with self.lock: if epoch != self.crnnEpoch: break if self.python_exit or not self.child_pid: break if data_type == b"data": seq_count += 1 segment_name, features, targets = args if segment_name is not None: segment_name = segment_name.decode("utf8") assert isinstance(features, numpy.ndarray) if isinstance(targets, dict): targets = {key.decode("utf8"): value for (key, value) in targets.items()} self.add_new_data( numpy_copy_and_set_unused(features), numpy_copy_and_set_unused(targets), segment_name=segment_name) elif data_type == b"exit": have_seen_the_whole = True break else: assert False, "not handled: (%r, %r)" % (data_type, args) if self.seq_list_file: try: os.remove(self.seq_list_file) except Exception as e: print("%s: error when removing %r: %r" % (self, self.seq_list_file, e), file=log.v5) finally: self.seq_list_file = None if not self.python_exit: with self.lock: self.finish_sprint_epoch(seen_all=have_seen_the_whole) if have_seen_the_whole: self._num_seqs = self.next_seq_to_be_added print("%s (proc %i) finished reading epoch %i, seen all %r (finished), num seqs %i" % ( self, child_pid, epoch, have_seen_the_whole, seq_count), file=log.v5) except Exception as exc: if not self.python_exit: # Catch all standard exceptions. # Don't catch KeyboardInterrupt here because that will get send by the main thread # when it is exiting. It's never by the user because SIGINT will always # trigger KeyboardInterrupt in the main thread only. if epoch == self.crnnEpoch: with self.lock: self.finish_sprint_epoch(seen_all=False) try: print("%s reader failed (%s)" % (self, exc), file=log.v1) sys.excepthook(*sys.exc_info()) print("") finally: # Exceptions are fatal. If we can recover, we should handle it in run_inner(). interrupt_main()
def reader_thread_proc(self, child_pid, epoch): try: self.add_data_thread_id = thread.get_ident() self.initSprintEpoch(epoch) haveSeenTheWhole = False while not self.python_exit: try: dataType, args = self._read_next_raw() except (IOError, EOFError): with self.lock: if epoch != self.crnnEpoch: # We have passed on to a new epoch. This is a valid reason that the child has been killed. break if self.python_exit: break raise with self.lock: if epoch != self.crnnEpoch: break if self.python_exit: break if dataType == "data": segmentName, features, targets = args self.addNewData(numpy_copy_and_set_unused(features), numpy_copy_and_set_unused(targets), segmentName=segmentName) elif dataType == "exit": haveSeenTheWhole = True break else: assert False, "not handled: (%r, %r)" % (dataType, args) if self.seq_list_file: try: os.remove(self.seq_list_file) except Exception as e: print >> log.v5, "ExternSprintDataset: error when removing %r: %r" % (self.seq_list_file, e) finally: self.seq_list_file = None if not self.python_exit: with self.lock: self.finishSprintEpoch() if haveSeenTheWhole: self._num_seqs = self.next_seq_to_be_added print >> log.v5, "ExternSprintDataset finished reading epoch %i" % epoch except Exception: # Catch all standard exceptions. # Don't catch KeyboardInterrupt here because that will get send by the main thread # when it is exiting. It's never by the user because SIGINT will always # trigger KeyboardInterrupt in the main thread only. try: print >> log.v1, "ExternSprintDataset reader failed" sys.excepthook(*sys.exc_info()) print "" finally: # Exceptions are fatal. If we can recover, we should handle it in run_inner(). interrupt_main()