Esempio n. 1
0
def execute(config_path, training, version=None, render=False, debug=False, profile=False,
            random=False):
    # load config
    config = load_config(config_path, version=version, render=render, debug=debug,
                         training=training)

    # run each evaluation
    error = False
    for evaluation_config in config:
        try:
            # create trainer
            trainer = load_trainer(evaluation_config)

            # perform evaluation
            trainer.execute(render=render, profile=profile, random=random)
        except (KeyboardInterrupt, SystemExit, bdb.BdbQuit):
            log_warning("Evaluation halted by request.")
            break
        except Exception as e:
            log_error(f"Evaluation failed: {e}")
            traceback.print_exc()
            error = True
            break

    # allow local runs to keep tensorboard alive
    if config.local and not error:
        if config.get("tensorboard", False):
            log("Experiment complete, Tensorboard still alive.  Press any key to terminate...")
        else:
            log("Experiment complete.  Press any key to terminate...")
        getch()
Esempio n. 2
0
    def stop_server(self):
        # stop tensorboard server
        if self.server is not None:
            log(f"Stopping tensorboard server")

            self.server.terminate()
            self.server = None
Esempio n. 3
0
    def load(self, load_checkpoint):
        if self.loading and load_checkpoint:
            try:
                log(f"Loading model: {load_checkpoint}")

                self.saver.restore(self.sess, load_checkpoint)
                return True
            except Exception as e:
                log_error(f"Failed to load model: {e}")
        return False
Esempio n. 4
0
    def save(self):
        if self.training and self.save_path is not None:
            t0 = time.time()
            save_path = self.saver.save(self.sess, self.save_path, global_step=self.global_step)
            # , write_meta_graph=self.dirty_meta_graph)  # TODO - not sure if I ever need this
            self.dirty_meta_graph = False

            log(f"Saved model: {save_path}  ({time.time() - t0:.2} secs)")
            return True
        return False
Esempio n. 5
0
    def start_server(self):
        if self.server is None:
            # tensorboard should ignore Ctrl-C interrupts, and only be terminated explicitly
            def ignore_interrupt():
                signal.signal(signal.SIGINT, signal.SIG_IGN)

            # start tensorboard server
            path = self.config.tensorboard_path
            port = 6006
            self.server = Popen(["tensorboard", "--logdir", path],
                                preexec_fn=ignore_interrupt,
                                stdout=PIPE,
                                stderr=PIPE)
            atexit.register(self.stop_server)

            url = f"http://{self.config.ip}:{port}"
            log(f"Started tensorboard server: {url}  ({path})",
                color="white",
                bold=True)
Esempio n. 6
0
def _print_memory(mem, label):
    log(f"Memory | {label} | {mem}", "magenta", bold=True)