Exemple #1
0
    def __init__(self,
                 model_name: str,
                 filename: str,
                 configuration: dict,
                 run_number: int,
                 project_dir: str,
                 continue_training=False,
                 seed: int = None,
                 device: str = 'cuda',
                 parameters=None):
        self._model_name = model_name
        self.filename = filename
        self.device = device
        self.continue_training = continue_training
        self.run_number = run_number
        self.seed = seed if seed is not None else (dtm.date.today().year +
                                                   self.run_number)

        self.contents = configuration
        self.project_dir = project_dir

        self.command_descriptors = self.contents.get('commands', [])

        # This one is special and needs to get removed
        if 'commands' in self.contents:
            del self.contents['commands']

        self.provider = Provider(self._prepare_environment(),
                                 {'model_config': self},
                                 parameters=parameters)
Exemple #2
0
class ModelConfig:
    """
    Read from YAML configuration of a model, specifying all details of the run.
    Is a frontend for the provider, resolving all dependency-injection requests.
    """

    PROJECT_FILE_NAME = '.velproject.yaml'

    @staticmethod
    def find_project_directory(start_path) -> str:
        """ Locate top-level project directory  """
        start_path = os.path.realpath(start_path)
        possible_name = os.path.join(start_path, ModelConfig.PROJECT_FILE_NAME)

        if os.path.exists(possible_name):
            return start_path
        else:
            up_path = os.path.realpath(os.path.join(start_path, '..'))
            if os.path.realpath(start_path) == up_path:
                raise RuntimeError(f"Couldn't find project file starting from {start_path}")
            else:
                return ModelConfig.find_project_directory(up_path)

    @classmethod
    def from_file(cls, filename: str, run_number: int, reset=False, seed: int=None, device: str='cuda', params=None):
        """ Create model config from file """
        with open(filename, 'r') as fp:
            model_config_contents = Parser.parse(fp)

        project_config_path = ModelConfig.find_project_directory(os.path.dirname(os.path.abspath(filename)))

        with open(os.path.join(project_config_path, cls.PROJECT_FILE_NAME), 'r') as fp:
            project_config_contents = Parser.parse(fp)

        aggregate_dictionary = {
            **project_config_contents,
            **model_config_contents
        }

        # Options that should exist for every config
        try:
            model_name = model_config_contents['name']
        except KeyError:
            raise VelInitializationException("Model configuration must have a 'name' key")

        return ModelConfig(
            model_name=model_name,
            filename=filename,
            configuration=aggregate_dictionary,
            run_number=run_number,
            project_dir=project_config_path,
            reset=reset,
            seed=seed,
            device=device,
            parameters=params
        )

    @classmethod
    def from_memory(cls, model_name: str, model_data: dict, run_number: int, project_dir: str,
                    reset=False, seed: int=None, device: str='cuda', params=None):
        """ Create model config from supplied data """
        return ModelConfig(
            model_name=model_name,
            filename="[memory]",
            configuration=model_data,
            run_number=run_number,
            project_dir=project_dir,
            reset=reset,
            seed=seed,
            device=device,
            parameters=params
        )

    def __init__(self, model_name: str, filename: str, configuration: dict, run_number: int, project_dir: str,
                 reset=False, seed: int=None, device: str='cuda', parameters=None):
        self._model_name = model_name
        self.filename = filename
        self.device = device
        self.reset = reset
        self.run_number = run_number
        self.seed = seed if seed is not None else (dtm.date.today().year + self.run_number)

        self.contents = configuration
        self.project_dir = project_dir

        self.command_descriptors = self.contents.get('commands', [])

        # This one is special and needs to get removed
        if 'commands' in self.contents:
            del self.contents['commands']

        self.provider = Provider(self._prepare_environment(), {'model_config': self}, parameters=parameters)

    def _prepare_environment(self) -> dict:
        """ Return full environment for dependency injection """
        return {**self.contents, 'run_number': self.run_number}

    def render_configuration(self) -> dict:
        """ Return a nice and picklable run configuration """
        return self.provider.render_configuration()

    ####################################################################################################################
    # COMMAND UTILITIES
    def get_command(self, command_name):
        """ Return object for given command """
        return self.provider.instantiate_from_data(self.command_descriptors[command_name])

    def run_command(self, command_name, varargs):
        """ Instantiate model class """
        command_descriptor = self.get_command(command_name)
        return command_descriptor.run(*varargs)

    ####################################################################################################################
    # MODEL DIRECTORIES
    def checkpoint_dir(self, *args) -> str:
        """ Return checkpoint directory for this model """
        return self.output_dir('checkpoints', self.run_name, *args)

    def data_dir(self, *args) -> str:
        """ Return data directory for given dataset """
        return self.project_data_dir(*args)

    def openai_dir(self) -> str:
        """ Return directory for openai output files for this model """
        return self.output_dir('openai', self.run_name)

    def project_data_dir(self, *args) -> str:
        """ Directory where to store data """
        return os.path.join(self.project_dir, 'data', *args)

    def output_dir(self, *args) -> str:
        """ Directory where to store output """
        return os.path.join(self.project_dir, 'output', *args)

    def project_top_dir(self, *args) -> str:
        """ Project top-level directory """
        return os.path.join(self.project_dir, *args)

    ####################################################################################################################
    # NAME UTILITIES
    @property
    def run_name(self) -> str:
        """ Return name of the run """
        return "{}/{}".format(self._model_name, self.run_number)

    @property
    def name(self) -> str:
        """ Return name of the model """
        return self._model_name

    ####################################################################################################################
    # PROVIDER API
    def provide(self, name):
        """ Return a dependency-injected instance """
        return self.provider.instantiate_by_name(name)

    ####################################################################################################################
    # BANNERS - Maybe shouldn't be here, but they are for now
    def banner(self, command_name) -> None:
        """ Print a banner for running the system """
        device = torch.device(self.device)
        print("=" * 80)
        print(f"Pytorch version: {torch.__version__} cuda version {torch.version.cuda} cudnn version {torch.backends.cudnn.version()}")
        print("Running model {}, run {} -- command {} -- device {}".format(self._model_name, self.run_number, command_name, self.device))
        if device.type == 'cuda':
            device_idx = 0 if device.index is None else device.index
            print(f"CUDA Device name {torch.cuda.get_device_name(device_idx)}")
        print(dtm.datetime.now().strftime("%Y/%m/%d - %H:%M:%S"))
        print("=" * 80)

    def quit_banner(self) -> None:
        """ Print a banner for running the system """
        print("=" * 80)
        print("Done.")
        print(dtm.datetime.now().strftime("%Y/%m/%d - %H:%M:%S"))
        print("=" * 80)

    ####################################################################################################################
    # Small UI utils
    def __repr__(self):
        return f"<ModelConfig at {self.filename}>"