Beispiel #1
0
def copy(source,
         destination,
         lazy=True,
         src_hash=None,
         dst_hash=None,
         checksum=md5_checksum):
    """Copy file from *source* to *destination*.

    If ``lazy`` is set to ``True`` and ``destination`` exists, the file **will be** copied **if and only if**
    *destination* is a different file (content-wise) than *source*.

    Args:
        source (PathLike): The first file to compare.
        destination (PathLike): The second file to compare.
        lazy (bool): Optional. Default to ``True``. If set to ``True``, actual copy performed if *destination* exists
            and has a different content than *source*.
        src_hash (str): Optional. Default to ``None``. If provided, it is used in place of the
            first file checksum to compare content.
        dst_hash (str): Optional. Default to ``None``. If provided, it is used in place of the
            second file checksum to compare content.
        checksum (Callable): A function which computes a checksum from a |Path|.

    Raises:
        OSError: If something went wrong during copy.

    """
    source = Path(source)
    destination = Path(destination)

    if source == destination:
        return

    if not source.is_file():
        raise OSError('Invalid source: {} is not a file.'.format(source))

    if destination.exists() and not destination.is_file():
        raise OSError(
            'Invalid destination: {} exists but is not a file.'.format(
                destination))

    if not destination.exists():
        lazy = False

    if (destination.is_file() and lazy) and is_duplicate(
            source, destination, src_hash, dst_hash, checksum=checksum):
        return

    shutil.copy(str(source), str(destination))
Beispiel #2
0
def initialisation(path, checkpoint_reference=None, name=None):
    """Construct a valid PMF model initialisation from a |Path|.

    The return type depends on the initialisation type:

    * If the model was initialised from a PMF model, the function will return a |Model| instance.
    * If the model was initialised from a single file, the function will return a |Checkpoint|.

    Args:
        path (PathLike): A valid |Path| to the model initialisation.
        checkpoint_reference (hashable): If initialising from a PMF model, one needs to provided a reference to the
            actual model |Checkpoint| used to construct a valid initialisation.
        name (str): If initialising from a file, one needs to provide a name used to identify the initial network to
            construct a valid initialisation.

    Returns:
        |Model|, |Checkpoint|: The retrieved model initialisation.

    Raises:
        OSError: If the path provided does not exists on the filesystem or points to neither a file nor a PMF model.
        ValueError: If the arguments provided are incompatibles: e.g. using a PMF initialisation without a
            checkpoint_reference.
        PlumsModelTreeValidationError: If the path provided point to an invalid PMF model.
        PlumsModelMetadataValidationError: If the path provided point to a PMF model with an invalid metadata.

    """
    path = Path(path)
    if not path.exists():
        raise OSError('Invalid path provided: {} does not exists'.format(path))

    if not path.is_dir():

        if path.is_file():
            if name is None:
                raise ValueError(
                    'Invalid arguments provided: {} points to a file but name is None.'
                    .format(path))

            return Checkpoint(name, path)

    elif (path / 'metadata.yaml').is_file():
        if checkpoint_reference is None:
            raise ValueError(
                'Invalid arguments provided: '
                '{} points to a PMF model but no checkpoint reference was given.'
                .format(path))

        model = Model.load(path, checkpoints=False)
        if checkpoint_reference not in model.checkpoint_collection:
            raise ValueError('Invalid arguments provided: '
                             '{} points to a PMF model which does not '
                             'contains {} as a checkpoint.'.format(
                                 path, checkpoint_reference))
        model._checkpoint = checkpoint_reference

        return model

    else:
        raise OSError(
            'Invalid path provided: {} must either be a PMF model or a weight file.'
            .format(path))
Beispiel #3
0
    def save(self, path, force=False, **kwargs):
        """Save a |Model| to |Path|.

        Args:
            path (PathLike): The |Path| where to save.
            force (bool): Optional. Default to ``False``. If path is an existing non-PMF path or a PMF model with the
                same :attr:`id`, do not raise and carry on saving.

        Raises:
            ValueError: If ``path`` points to a file.
            OSError: If ``path`` points to:

                * A non-empty directory which does not contains a PMF model and ``force`` is ``False``.
                * A non-empty directory which contains a PMF model with the same :attr:`id` and ``force`` is ``False``.
                * A non-empty directory which contains a PMF model with a different :attr:`id`.
                * A non-empty directory which contains a PMF model with an invalid metadata file.

        """
        # TODO: Improve docstring.
        path = Path(path)
        model_dst = Mock()

        # sanity checks
        if path.exists():
            if path.is_file():
                raise ValueError('Invalid path: {} is a file.'.format(path))

            if (path / 'metadata.yaml').exists():
                with open(str(path / 'metadata.yaml'), 'r') as f:
                    metadata = yaml.safe_load(f)

                try:
                    metadata = Metadata().validate(metadata)
                except (SchemaError, PlumsValidationError):
                    # If the metadata file happens to be invalid, we might enter uncharted territories we are not
                    # prepared for. Abort !
                    raise OSError(
                        'Invalid path: {} is not a valid PMF metadata file.'.
                        format(path / 'metadata.yaml'))

                if metadata['model']['id'] != self.id:
                    # If the destination model id is different from ours, we might enter uncharted territories we are
                    # not prepared for. Abort !
                    raise OSError(
                        'Invalid path: {} has a different PMF model id '
                        '({} != {}).'.format(path / 'metadata.yaml', self.id,
                                             metadata['model']['id']))

                try:
                    model_dst = Model.load(path,
                                           checkpoints=kwargs.get(
                                               'checkpoints', True))
                except (SchemaError, PlumsValidationError):
                    if not force:
                        raise OSError(
                            'Invalid path: {} is an invalid PMF model '
                            'with the same model id ({}).'.format(
                                path / 'metadata.yaml', self.id))
                    # Use the insider fail-agnostic back door to load what we can from the model anyway
                    model_dst = Model._init_from_path(path, metadata)
                    # We remove PMF related elements as the previous written model is not valid, not that is the
                    # deletion fails, we ignore it because a valid PMF model will be written anyway and we never
                    # assume the save destination to be empty.
                    rmtree(path,
                           ignore_errors=True,
                           black_list=('metadata',
                                       model_dst.producer.configuration))
            else:
                if not force:
                    raise OSError(
                        'Invalid path: {} already exists.'.format(path))

        # Initialize destination
        path.mkdir(parents=True, exist_ok=True)

        # Prepare metadata dictionary
        __metadata__ = {
            'format': {
                'version': self.__version__,
                'producer': {
                    'name': self.producer.name,
                    'version': {
                        'format': self.producer.version.format,
                        'value': self.producer.version.version
                    }
                }
            },
            'model': {
                'name': self.name,
                'id': self.id,
                'training': {
                    'status': self.training.status,
                    'start_epoch': self.training.start_epoch,
                    'start_time': self.training.start_timestamp,
                    'latest_epoch': self.training.latest_epoch,
                    'latest_time': self.training.latest_timestamp,
                    'end_epoch': self.training.end_epoch,
                    'end_time': self.training.end_timestamp,
                    'latest': self.checkpoint_collection.latest,
                    'checkpoints': {}
                },
                'initialisation': None,
                'configuration': {}
            }
        }

        # Initialize directory
        (path / 'data' / 'checkpoints').mkdir(parents=True, exist_ok=True)

        # Save build parameters
        # It should be a rather small file, so blindingly overriding it
        # should be faster than write-in-temp and lazy-copy
        with open(str(path / 'data' / 'build_parameters.yaml'), 'w') as f:
            yaml.safe_dump(self.build_parameters, f)

        # Copy configuration
        configuration_dst = path / self.producer.configuration[-1]
        copy(str(self.producer.configuration),
             str(configuration_dst),
             lazy=model_dst is not None)
        # Add configuration to metadata
        __metadata__['model']['configuration'].update({
            'path':
            str(configuration_dst.anchor_to_path(path)),
            'hash':
            md5_checksum(self.producer.configuration)
        })

        # Copy initialisation
        if self.initialisation is None:
            (path / 'data' / 'initialisation').mkdir(parents=True,
                                                     exist_ok=True)

        if isinstance(self.initialisation, Checkpoint):
            (path / 'data' / 'initialisation').mkdir(parents=True,
                                                     exist_ok=True)
            checkpoint_dst = path / 'data' / 'initialisation' / self.initialisation.path[
                -1]
            copy(str(self.initialisation.path),
                 str(checkpoint_dst),
                 lazy=model_dst is not None,
                 src_hash=self.initialisation.hash,
                 dst_hash=getattr(model_dst.initialisation, 'name', None))
            # Add file initialisation to metadata
            __metadata__['model']['initialisation'] = {
                'file': {
                    'name': str(self.initialisation.name),
                    'path': str(checkpoint_dst.anchor_to_path(path)),
                    'hash': self.initialisation.hash
                }
            }

        if isinstance(self.initialisation, Model):
            self.initialisation.save(path / 'data' / 'initialisation',
                                     force=force,
                                     checkpoints=False)
            # Add PMF initialisation to metadata
            __metadata__['model']['initialisation'] = {
                'pmf': {
                    'name':
                    self.initialisation.name,
                    'id':
                    self.initialisation.id,
                    'path':
                    str((path / 'data' /
                         'initialisation').anchor_to_path(path)),
                    'checkpoint':
                    self.initialisation.checkpoint
                }
            }

        # Copy checkpoint_collection
        for reference, checkpoint in self.checkpoint_collection.items():
            checkpoint_dst = path / 'data' / 'checkpoints' / checkpoint.path[-1] \
                if kwargs.get('checkpoints', True) else None
            # Add checkpoint to metadata
            __metadata__['model']['training']['checkpoints'][reference] = {
                'epoch':
                checkpoint.epoch,
                'path':
                str(checkpoint_dst.anchor_to_path(path)) if kwargs.get(
                    'checkpoints', True) else '.',
                'hash':
                checkpoint.hash
            }
            # If needed (usually), copy file to destination
            if kwargs.get('checkpoints', True):
                copy(str(checkpoint.path),
                     str(checkpoint_dst),
                     lazy=model_dst is not None,
                     src_hash=checkpoint.hash,
                     dst_hash=model_dst.checkpoint_collection.get(
                         checkpoint.name))

        # Save metadata
        with open(str(path / 'metadata.yaml'), 'w') as f:
            yaml.safe_dump(__metadata__, f)
Beispiel #4
0
class Checkpoint(object):
    """Define a checkpoint Python representation.

    A |Checkpoint| might be defined by the following parameters:

    * A :attr:`name`, a :attr:`path` and a :attr:`hash`.
    * A :attr:`name` and a :attr:`hash`.
    * A :attr:`name` and a :attr:`path`.

    Note that although the :attr:`epoch` is never needed to strictly define a |Checkpoint|, it is compulsory to inject
    it into a |CheckpointCollection|.

    Args:
        name (hashable): The |Checkpoint| unique identifier.
        path (Pathlike): Optional. default to ``None``. The path to the |Checkpoint| data file.
        epoch (int): Optional. default to ``None``. The |Checkpoint| epoch, if known.
        hash (str): Optional. default to ``None``. The |Checkpoint| data file checksum.

    Attributes:
        name (hashable): The |Checkpoint| unique identifier.
        path (Pathlike): The path to the |Checkpoint| data file.
        epoch (int): The |Checkpoint| epoch, if known.
        hash (str): The |Checkpoint| data file checksum.

    """
    def __init__(self, name, path=None, epoch=None, hash=None):
        self.path = Path(path) if path is not None else path

        if self.path is not None and not self.path.is_file():
            raise OSError('Invalid checkpoint: {} is not a file.'.format(path))

        self.name = name
        self.epoch = epoch
        self.hash = MD5Checksum().validate(
            hash) if hash is not None else md5_checksum(self.path)

    def __repr__(self):
        """Return a representation of a |Checkpoint|."""
        return '{}(name={}, path={}, epoch={}, hash={})'.format(
            self.__class__.__name__, self.name, self.path, self.epoch,
            self.hash)

    __str__ = __repr__

    def __eq__(self, other):
        """Return whether two |Checkpoint| have the same :attr:`epoch` and the same :attr:`hash`.

        Args:
            other (|Checkpoint|): A |Checkpoint| to compare to.

        Returns:
            bool: ``True`` if both have the same :attr:`epoch` and the same :attr:`hash`.

        """
        try:
            return self.epoch == other.epoch and self.hash == other.hash
        except AttributeError:
            return NotImplemented

    def __ne__(self, other):
        """Return whether two |Checkpoint| do not have the same :attr:`epoch` and the same :attr:`hash`.

        Args:
            other (|Checkpoint|): A |Checkpoint| to compare to.

        Returns:
            bool: ``True`` if none have the same :attr:`epoch` and the same :attr:`hash`.

        """
        return not self == other

    def __hash__(self):
        """Return a valid hash for hte |Checkpoint|.

        Returns:
            str: A hash consisting in the hash of the (:attr:`epoch`, :attr:`hash`) tuple.

        """
        return hash((self.epoch, self.hash))