Esempio n. 1
0
def main():
    lab = Lab(os.getcwd())
    parser = argparse.ArgumentParser(description='Run TensorBoard')
    parser.add_argument("-l",
                        action='store_true',
                        dest='list',
                        help='List all available experiments')
    parser.add_argument('-e',
                        required=False,
                        type=str,
                        nargs='+',
                        dest='experiments',
                        help='List of experiments')

    args = parser.parse_args()

    logger = Logger()

    if args.list:
        utils.list_experiments(lab, logger)
    elif args.experiments:
        # List out the experiments.
        # This will fail if experiments are missing.
        runs = utils.get_last_trials(lab, args.experiments)
        utils.list_trials(runs, logger)

        # Invoke Tensorboard
        cmd = utils.get_tensorboard_cmd(lab, args.experiments)
        logger.log("Starting TensorBoard", color=colors.Style.bold)
        os.system(cmd)
    else:
        parser.print_usage()
Esempio n. 2
0
class Experiment:
    """
    ## Experiment

    Each experiment has different configurations or algorithms.
    An experiment can have multiple trials.
    """

    __variables: Optional[List[tf.Variable]]

    def __init__(self, *,
                 lab: Lab,
                 name: str,
                 python_file: str,
                 comment: str,
                 check_repo_dirty: bool = True):
        """
        ### Create the experiment

        :param lab: reference to current lab
        :param name: name of the experiment
        :param python_file: `__file__` that invokes this. This is stored in
         the experiments list.
        :param comment: a short description of the experiment
        :param check_repo_dirty: whether not to start the experiment if
         there are uncommitted changes.

        The experiments log keeps track of `python_file`, `name`, `comment` as
         well as the git commit.

        Experiment maintains the locations of checkpoints, logs, etc.
        """

        self.__variables = None
        self.info = ExperimentInfo(lab, name)

        self.logger = Logger()
        self.check_repo_dirty = check_repo_dirty

        self.lab = lab

        if not tf.gfile.Exists(str(self.info.experiment_path)):
            tf.gfile.MakeDirs(str(self.info.experiment_path))

        self.trial = Trial.new_trial(
            python_file=python_file,
            trial_time=time.localtime(),
            comment=comment)

        repo = git.Repo(self.lab.path)

        self.trial.commit = repo.active_branch.commit.hexsha
        self.trial.commit_message = repo.active_branch.commit.message.strip()
        self.trial.is_dirty = repo.is_dirty()

    def print_info_and_check_repo(self):
        """
        ## 🖨 Print the experiment info and check git repo status
        """
        self.logger.log_color([
            (self.info.name, colors.Style.bold)
        ])
        self.logger.log_color([
            ("\t", None),
            (self.trial.comment, colors.BrightColor.cyan)
        ])
        self.logger.log_color([
            ("\t", None),
            ("[dirty]" if self.trial.is_dirty else "[clean]", None),
            (": ", None),
            (f"\"{self.trial.commit_message.strip()}\"", colors.BrightColor.orange)
        ])

        # Exit if git repository is dirty
        if self.check_repo_dirty and self.trial.is_dirty:
            self.logger.log("Cannot trial an experiment with uncommitted changes. ",
                            new_line=False)
            self.logger.log("[FAIL]", color=colors.BrightColor.red)
            exit(1)

    @util.deprecated("Use load_checkpoint_numpy")
    def load_checkpoint(self, session: tf.Session):
        """
        ## Load latest TensorFlow checkpoint

        **Use numpy array saving.**

        It's simpler and you can easily load subsets of
        variable.
        Or even manually swap variables between experiments with just
        file copies to try things out.
        """
        if not _load_checkpoint(session, str(self.info.checkpoint_path)):
            tf_util.init_variables(session)
            return False
        else:
            return True

    @util.deprecated("Use save_checkpoint_numpy")
    def save_checkpoint(self, session: tf.Session, global_step: int):
        """
        ## Save TensorFlow checkpoint

        Use numpy array saving.
        """
        _delete_old_checkpoints(str(self.info.checkpoint_path))
        _save_checkpoint(session, str(self.info.checkpoint_path),
                         str(self.info.model_file), global_step)

    def load_checkpoint_numpy(self,
                              session: tf.Session):
        """
        ## Load model as a set of numpy arrays
        """

        checkpoints_path = pathlib.Path(self.info.checkpoint_path)
        max_step = -1
        for c in checkpoints_path.iterdir():
            max_step = max(max_step, int(c.name))

        if max_step < 0:
            return False

        checkpoint_path = checkpoints_path / str(max_step)

        with open(str(checkpoint_path / "info.json"), "r") as f:
            files = json.loads(f.readline())

        # Load each variable
        for variable in self.__variables:
            file_name = files[variable.name]
            value = np.load(str(checkpoint_path / file_name))
            ph = tf.placeholder(value.dtype,
                                shape=value.shape,
                                name=f"{tf_util.strip_variable_name(variable.name)}_ph")

            assign_op = tf.assign(variable, ph)
            session.run(assign_op, feed_dict={ph: value})

        return True

    def save_checkpoint_numpy(self,
                              session: tf.Session,
                              global_step: int):
        """
        ## Save model as a set of numpy arrays
        """

        checkpoints_path = pathlib.Path(self.info.checkpoint_path)
        if not checkpoints_path.exists():
            checkpoints_path.mkdir()

        checkpoint_path = checkpoints_path / str(global_step)
        assert not checkpoint_path.exists()

        checkpoint_path.mkdir()

        values = session.run(self.__variables)

        # Save each variable
        files = {}
        for variable, value in zip(self.__variables, values):
            file_name = tf_util.variable_name_to_file_name(
                tf_util.strip_variable_name(variable.name))
            file_name = f"{file_name}.npy"
            files[variable.name] = file_name

            np.save(str(checkpoint_path / file_name), value)

        # Save header
        with open(str(checkpoint_path / "info.json"), "w") as f:
            f.write(json.dumps(files))

        # Delete old checkpoints
        for c in checkpoints_path.iterdir():
            if c.name != checkpoint_path.name:
                util.rm_tree(c)

    def save_npy(self, array: np.ndarray, name: str):
        """
        ## Save a single numpy array

        This is used to save processed data
        """
        tf.gfile.MkDir(str(self.info.npy_path))
        file_name = name + ".npy"
        np.save(str(self.info.npy_path / file_name), array)

    def load_npy(self, name: str):
        """
        ## Load a single numpy array

        This is used to save processed data
        """
        file_name = name + ".npy"
        return np.load(str(self.info.npy_path / file_name))

    def clear_checkpoints(self):
        """
        ## Clear old checkpoints

        We run this when running a new fresh trial
        """
        if tf.gfile.Exists(str(self.info.checkpoint_path)):
            tf.gfile.DeleteRecursively(str(self.info.checkpoint_path))

    def clear_summaries(self):
        """
        ## Clear TensorBoard summaries

        We run this when running a new fresh trial
        """
        if tf.gfile.Exists(str(self.info.summary_path)):
            tf.gfile.DeleteRecursively(str(self.info.summary_path))

    def create_writer(self, session: tf.Session):
        """
        ## Create TensorFlow summary writer
        """
        self.logger.writer = tf.summary.FileWriter(str(self.info.summary_path), session.graph)

    def clear_screenshots(self):
        """
        ## Clear screenshots
        """
        path = str(self.info.screenshots_path)
        if tf.gfile.Exists(path):
            tf.gfile.DeleteRecursively(path)

        tf.gfile.MkDir(path)

    def save_screenshot(self, img, file_name: str):
        """
        ## Save screenshot

        Use this to save images
        """
        img.save(str(self.info.screenshots_path / file_name))

    def set_variables(self, variables: List[tf.Variable]):
        """
        ## Set variable for saving and loading
        """
        self.__variables = variables

    def _log_trial(self, is_add: bool):
        """
        ### Log trial

        This will add or update a trial in the `trials.yaml` file
        """
        try:
            with open(str(self.info.trials_log_file), "r") as file:
                trials = util.yaml_load(file.read())
        except FileNotFoundError:
            trials = []

        if is_add:
            trials.append(self.trial.to_dict())
        else:
            trials[-1] = self.trial.to_dict()

        with open(str(self.info.trials_log_file), "w") as file:
            file.write(util.yaml_dump(trials))

    def _log_python_file(self):
        """
        ### Add header to python source

        This will add or update trial information in python source file
        """
        with open(self.trial.python_file, "r") as file:
            lines = file.read().splitlines()

        trial_print = self.trial.pretty_print()

        lines = commenter.update(lines, trial_print)
        code = '\n'.join(lines)

        with open(self.trial.python_file, "w") as file:
            file.write(code)

    def save_progress(self, progress: Dict[str, str], is_add: bool):
        """
        ## Save experiment progress
        """
        self.trial.set_progress(progress, is_add)

        self._log_trial(is_add=False)
        self._log_python_file()

    def start(self, global_step: int, session: tf.Session):
        """
        ## Start experiment

        Load a checkpoint or reset based on `global_step`.
        """

        self.trial.start_step = global_step
        self._log_trial(is_add=True)
        self._log_python_file()

        if global_step > 0:
            # load checkpoint if we are starting from middle
            with self.logger.monitor("Loading checkpoint") as m:
                if self.__variables is None:
                    m.is_successful = self.load_checkpoint(session)
                else:
                    m.is_successful = self.load_checkpoint_numpy(session)
        else:
            # initialize variables and clear summaries if we are starting from scratch
            with self.logger.monitor("Clearing summaries"):
                self.clear_summaries()
            with self.logger.monitor("Clearing checkpoints"):
                self.clear_checkpoints()
            with self.logger.monitor("Initializing variables"):
                tf_util.init_variables(session)

        self.create_writer(session)
Esempio n. 3
0
class Experiment:
    """
    ## Experiment

    Each experiment has different configurations or algorithms.
    An experiment can have multiple trials.
    """
    def __init__(self, *, name: str, python_file: str, comment: str,
                 check_repo_dirty: Optional[bool],
                 is_log_python_file: Optional[bool]):
        """
        ### Create the experiment

        :param name: name of the experiment
        :param python_file: `__file__` that invokes this. This is stored in
         the experiments list.
        :param comment: a short description of the experiment
        :param check_repo_dirty: whether not to start the experiment if
         there are uncommitted changes.

        The experiments log keeps track of `python_file`, `name`, `comment` as
         well as the git commit.

        Experiment maintains the locations of checkpoints, logs, etc.
        """

        self.lab = Lab(python_file)

        if check_repo_dirty is None:
            check_repo_dirty = self.lab.check_repo_dirty
        if is_log_python_file is None:
            is_log_python_file = self.lab.is_log_python_file

        self.info = ExperimentInfo(self.lab, name)

        self.check_repo_dirty = check_repo_dirty

        experiment_path = pathlib.Path(self.info.experiment_path)
        if not experiment_path.exists():
            experiment_path.mkdir(parents=True)

        self.trial = Trial.new_trial(python_file=python_file,
                                     trial_time=time.localtime(),
                                     comment=comment)

        repo = git.Repo(self.lab.path)

        self.trial.commit = repo.head.commit.hexsha
        self.trial.commit_message = repo.head.commit.message.strip()
        self.trial.is_dirty = repo.is_dirty()
        self.trial.diff = repo.git.diff()
        self.__progress_saver = _ExperimentProgressSaver(
            trial=self.trial,
            trials_log_file=self.info.trials_log_file,
            is_log_python_file=is_log_python_file)

        checkpoint_saver = self._create_checkpoint_saver()
        self.logger = Logger(progress_saver=self.__progress_saver,
                             checkpoint_saver=checkpoint_saver)

    def _create_checkpoint_saver(self):
        return None

    def print_info_and_check_repo(self):
        """
        ## 🖨 Print the experiment info and check git repo status
        """
        self.logger.log_color([(self.info.name, colors.Style.bold)])
        self.logger.log_color([("\t", None),
                               (self.trial.comment, colors.BrightColor.cyan)])
        self.logger.log_color([
            ("\t", None),
            ("[dirty]" if self.trial.is_dirty else "[clean]", None),
            (": ", None),
            (f"\"{self.trial.commit_message.strip()}\"",
             colors.BrightColor.orange)
        ])

        # Exit if git repository is dirty
        if self.check_repo_dirty and self.trial.is_dirty:
            self.logger.log(
                "Cannot trial an experiment with uncommitted changes. ",
                new_line=False)
            self.logger.log("[FAIL]", color=colors.BrightColor.red)
            exit(1)

    def save_npy(self, array: np.ndarray, name: str):
        """
        ## Save a single numpy array

        This is used to save processed data
        """
        npy_path = pathlib.Path(self.info.npy_path)
        npy_path.mkdir(parents=True)
        file_name = name + ".npy"
        np.save(str(self.info.npy_path / file_name), array)

    def load_npy(self, name: str):
        """
        ## Load a single numpy array

        This is used to save processed data
        """
        file_name = name + ".npy"
        return np.load(str(self.info.npy_path / file_name))

    def clear_checkpoints(self):
        """
        ## Clear old checkpoints

        We run this when running a new fresh trial
        """
        path = pathlib.Path(self.info.checkpoint_path)
        if path.exists():
            util.rm_tree(path)

    def clear_summaries(self):
        """
        ## Clear TensorBoard summaries

        We run this when running a new fresh trial
        """
        path = pathlib.Path(self.info.summary_path)
        if path.exists():
            util.rm_tree(path)

    def clear_screenshots(self):
        """
        ## Clear screenshots
        """
        path = pathlib.Path(self.info.screenshots_path)
        if path.exists():
            util.rm_tree(path)

        path.mkdir(parents=True)

    def save_screenshot(self, img, file_name: str):
        """
        ## Save screenshot

        Use this to save images
        """
        img.save(str(self.info.screenshots_path / file_name))

    def _start(self):
        self.__progress_saver.save()

        path = pathlib.Path(self.info.diff_path)
        if not path.exists():
            path.mkdir(parents=True)

        with open(str(path / f"{self.trial.index}.diff"), "w") as f:
            f.write(self.trial.diff)
Esempio n. 4
0
class Experiment:
    """
    ## Experiment

    Each run is an experiment.
    """

    def __init__(self, *,
                 lab: Lab,
                 name: str,
                 run_file: str,
                 comment: str,
                 check_repo_dirty: bool = True):
        """
        ### Create the experiment

        :param lab: reference to current lab
        :param name: name of the experiment
        :param run_file: `__file__` that invokes this. This is stored in
         the experiments list.
        :param comment: a short description of the experiment
        :param check_repo_dirty: whether not to start the experiment if
         there are uncommitted changes.

        The experiments log keeps track of `run_file`, `name`, `comment` as
         well as the git commit.

        Experiment maintains the locations of checkpoints, logs, etc.
        """

        self.name = name
        self.logger = Logger()
        self.check_repo_dirty = check_repo_dirty

        self.lab = lab
        self.experiment_path = lab.experiments / name

        if not tf.gfile.Exists(str(self.experiment_path)):
            tf.gfile.MakeDirs(str(self.experiment_path))

        checkpoint_path = self.experiment_path / "checkpoints"
        self.checkpoint_path = str(checkpoint_path)
        self.npy_path = self.experiment_path / "npy"
        self.model_file = str(checkpoint_path / 'model')

        self.summary_path = str(self.experiment_path / "log")
        self.screenshots_path = self.experiment_path / 'screenshots'

        if not self._log_run(run_file, comment):
            self.logger.log("Cannot run an experiment with uncommitted changes. ", new_line=False)
            self.logger.log("[FAIL]", color=colors.BrightColor.red)
            exit(1)

    def _log_run(self, run_file, comment):
        repo = git.Repo(self.lab.path)
        if self.check_repo_dirty and repo.is_dirty():
            return False

        log_file = str(self.experiment_path / "runs.txt")
        t = time.localtime()

        with open(log_file, "a+") as file:
            data = {
                'commit': repo.active_branch.commit.hexsha,
                'run_file': run_file,
                'date': "{}-{}-{}".format(t.tm_year, t.tm_mon, t.tm_mday),
                "time": "{}:{}:{}".format(t.tm_hour, t.tm_min, t.tm_sec),
                "comment": comment,
                'commit_message': repo.active_branch.commit.message
            }

            file.write("{}\n".format(json.dumps(data)))

        return True

    def load_checkpoint(self, session: tf.Session):
        """
        Load latest TensorFlow checkpoint
        """
        if not _load_checkpoint(session, self.checkpoint_path):
            tf_util.init_variables(session)
            return False
        else:
            return True

    def save_checkpoint(self, session: tf.Session, global_step: int):
        """
        Save TensorFlow checkpoint
        """
        _delete_old_checkpoints(self.checkpoint_path)
        _save_checkpoint(session, self.checkpoint_path, self.model_file, global_step)

    def save_npy(self, array: np.ndarray, name: str):
        """
        Save numpy array
        """
        tf.gfile.MkDir(str(self.npy_path))
        file_name = name + ".npy"
        np.save(str(self.npy_path / file_name), array)

    def load_npy(self, name: str):
        """
        Load numpy array
        """
        file_name = name + ".npy"
        return np.load(str(self.npy_path / file_name))

    def clear_checkpoints(self):
        """
        Clear old checkpoints
        """
        if tf.gfile.Exists(self.checkpoint_path):
            tf.gfile.DeleteRecursively(self.checkpoint_path)

    def clear_summaries(self):
        """
        Clear TensorBoard summaries
        """
        if tf.gfile.Exists(self.summary_path):
            tf.gfile.DeleteRecursively(self.summary_path)

    def create_writer(self, session: tf.Session):
        """
        Create TensorFlow summary writer
        """
        self.logger.writer = tf.summary.FileWriter(self.summary_path, session.graph)

    def clear_screenshots(self):
        """
        Clear screenshots
        """
        path = str(self.screenshots_path)
        if tf.gfile.Exists(path):
            tf.gfile.DeleteRecursively(path)

        tf.gfile.MkDir(path)

    def save_screenshot(self, img, file_name: str):
        """
        Save screenshot
        """
        img.save(str(self.screenshots_path / file_name))

    def start(self, global_step: int, session: tf.Session):
        """
        Start by either by loading a checkpoint or resetting.
        """
        if global_step > 0:
            # load checkpoint if we are starting from middle
            with self.logger.monitor("Loading checkpoint") as m:
                m.is_successful = self.load_checkpoint(session)
        else:
            # initialize variables and clear summaries if we are starting from scratch
            with self.logger.monitor("Clearing summaries"):
                self.clear_summaries()
            with self.logger.monitor("Clearing checkpoints"):
                self.clear_checkpoints()
            with self.logger.monitor("Initializing variables"):
                tf_util.init_variables(session)

        self.create_writer(session)
Esempio n. 5
0
class Experiment:
    """
    ## Experiment

    Each experiment has different configurations or algorithms.
    An experiment can have multiple trials.
    """
    def __init__(self,
                 *,
                 lab: Lab,
                 name: str,
                 python_file: str,
                 comment: str,
                 check_repo_dirty: bool = True):
        """
        ### Create the experiment

        :param lab: reference to current lab
        :param name: name of the experiment
        :param python_file: `__file__` that invokes this. This is stored in
         the experiments list.
        :param comment: a short description of the experiment
        :param check_repo_dirty: whether not to start the experiment if
         there are uncommitted changes.

        The experiments log keeps track of `python_file`, `name`, `comment` as
         well as the git commit.

        Experiment maintains the locations of checkpoints, logs, etc.
        """

        self.__variables = None
        self.info = ExperimentInfo(lab, name)

        self.logger = Logger()
        self.check_repo_dirty = check_repo_dirty

        self.lab = lab

        experiment_path = pathlib.Path(self.info.experiment_path)
        if not experiment_path.exists():
            experiment_path.mkdir(parents=True)

        self.trial = Trial.new_trial(python_file=python_file,
                                     trial_time=time.localtime(),
                                     comment=comment)

        repo = git.Repo(self.lab.path)

        self.trial.commit = repo.active_branch.commit.hexsha
        self.trial.commit_message = repo.active_branch.commit.message.strip()
        self.trial.is_dirty = repo.is_dirty()

    def print_info_and_check_repo(self):
        """
        ## 🖨 Print the experiment info and check git repo status
        """
        self.logger.log_color([(self.info.name, colors.Style.bold)])
        self.logger.log_color([("\t", None),
                               (self.trial.comment, colors.BrightColor.cyan)])
        self.logger.log_color([
            ("\t", None),
            ("[dirty]" if self.trial.is_dirty else "[clean]", None),
            (": ", None),
            (f"\"{self.trial.commit_message.strip()}\"",
             colors.BrightColor.orange)
        ])

        # Exit if git repository is dirty
        if self.check_repo_dirty and self.trial.is_dirty:
            self.logger.log(
                "Cannot trial an experiment with uncommitted changes. ",
                new_line=False)
            self.logger.log("[FAIL]", color=colors.BrightColor.red)
            exit(1)

    def save_npy(self, array: np.ndarray, name: str):
        """
        ## Save a single numpy array

        This is used to save processed data
        """
        npy_path = pathlib.Path(self.info.npy_path)
        npy_path.mkdir(parents=True)
        file_name = name + ".npy"
        np.save(str(self.info.npy_path / file_name), array)

    def load_npy(self, name: str):
        """
        ## Load a single numpy array

        This is used to save processed data
        """
        file_name = name + ".npy"
        return np.load(str(self.info.npy_path / file_name))

    def clear_checkpoints(self):
        """
        ## Clear old checkpoints

        We run this when running a new fresh trial
        """
        path = pathlib.Path(self.info.checkpoint_path)
        if path.exists():
            util.rm_tree(path)

    def clear_summaries(self):
        """
        ## Clear TensorBoard summaries

        We run this when running a new fresh trial
        """
        path = pathlib.Path(self.info.summary_path)
        if path.exists():
            util.rm_tree(path)

    def clear_screenshots(self):
        """
        ## Clear screenshots
        """
        path = pathlib.Path(self.info.screenshots_path)
        if path.exists():
            util.rm_tree(path)

        path.mkdir(parents=True)

    def save_screenshot(self, img, file_name: str):
        """
        ## Save screenshot

        Use this to save images
        """
        img.save(str(self.info.screenshots_path / file_name))

    def _log_trial(self, is_add: bool):
        """
        ### Log trial

        This will add or update a trial in the `trials.yaml` file
        """
        try:
            with open(str(self.info.trials_log_file), "r") as file:
                trials = util.yaml_load(file.read())
                if trials is None:
                    trials = []
        except FileNotFoundError:
            trials = []

        if is_add or len(trials) == 0:
            trials.append(self.trial.to_dict())
        else:
            trials[-1] = self.trial.to_dict()

        with open(str(self.info.trials_log_file), "w") as file:
            file.write(util.yaml_dump(trials))

    def _log_python_file(self):
        """
        ### Add header to python source

        This will add or update trial information in python source file
        """

        try:
            with open(self.trial.python_file, "r") as file:
                lines = file.read().splitlines()

            trial_print = self.trial.pretty_print()

            lines = commenter.update(lines, trial_print)
            code = '\n'.join(lines)

            with open(self.trial.python_file, "w") as file:
                file.write(code)
        except FileNotFoundError:
            pass

    def save_progress(self, progress: Dict[str, str]):
        """
        ## Save experiment progress
        """
        self.trial.set_progress(progress)

        self._log_trial(is_add=False)
        self._log_python_file()