def get_or_create( cls, source: typing.Union[str, pathlib.Path, typing.BinaryIO], filename: str = None, pair_style: str = None, species: list = None, atom_style: str = None, units: str = None, extra_tags: dict = None, ): """Get lammps potential data node from database or create a new one. This will check if there is a potential data node with matching md5 checksum and use that or create a new one if not existent. :param source: the source potential content, either a binary stream, or a ``str`` or ``Path`` to the path of the file on disk, which can be relative or absolute. :param filename: optional explicit filename to give to the file stored in the repository. :param pair_style: Type of potential according to LAMMPS :type pair_style: str :param species: Species that can be used for this potential. :type species: list :param atom_style: Type of treatment of the atoms according to LAMMPS. :type atom_style: str :param units: Default units to be used with this potential. :type units: str :param extra_tags: Dictionary with extra information to tag the potential, based on the KIM schema. :type extra_tags: dict :return: instance of ``LammpsPotentialData``, stored if taken from database, unstored otherwise. :raises TypeError: if the source is not a ``str``, ``pathlib.Path`` instance or binary stream. :raises FileNotFoundError: if the source is a filepath but does not exist. """ # pylint: disable=too-many-arguments source = cls.prepare_source(source) query = orm.QueryBuilder() query.append( cls, subclassing=False, filters={f'attributes.{cls._key_md5}': md5_from_filelike(source)}, ) existing = query.first() if existing: potential = existing[0] else: cls.pair_style = pair_style cls.species = species cls.atom_style = atom_style cls.units = units cls.extra_tags = extra_tags source.seek(0) potential = cls(source, filename) return potential
def set_file(self, source, filename=None): #pylint: disable=arguments-differ """ This is called in the __init__ of SingleFileData. It supports both absolute path and file streams. It is convenient to convert possible absolute paths in the corresponding file streams so we then support for other methods called here only the file streams. Please note that this approach have problems if we create subclasses with `IonData` as a parent. This is because the call to super does not return anything. Therefore we can not have source = super().set_file(source, filename, **kwargs) """ # Check we have a valid input and set the file and filename as attributes super().set_file(source, filename) # Transorm abs_paths in streams source = self._prepare_source(source) source.seek(0) # Set the md5 attribute self.set_attribute('md5', md5_from_filelike(source)) source.seek(0) # Set other attributes extracted reading the source parsed_data = parse_ion(source) source.seek(0) self.set_attribute('element', parsed_data["element"]) self.set_attribute('name', parsed_data["name"]) self.set_attribute('atomic_number', parsed_data["atomic_number"]) if parsed_data["mass"] is not None: self.set_attribute('mass', parsed_data["mass"])
def store(self, **kwargs): #pylint: disable=arguments-differ """ Store the node, reparsing the file so that the md5 and the element are correctly reset. """ from aiida.common.exceptions import ParsingError from aiida.common.files import md5_from_filelike if self.is_stored: return self with self.open(mode='r') as handle: parsed_data = parse_psf(handle) # Open in binary mode which is required for generating the md5 checksum with self.open(mode='rb') as handle: md5sum = md5_from_filelike(handle) try: element = parsed_data['element'] except KeyError: raise ParsingError("No 'element' parsed in the PSF file {};" " unable to store".format(self.filename)) self.set_attribute('element', str(element)) self.set_attribute('md5', md5sum) return super().store(**kwargs)
def store(self): """Write the current config to file. .. note:: if the configuration file already exists on disk and its contents differ from those in memory, a backup of the original file on disk will be created before overwriting it. :return: self """ from aiida.common.files import md5_from_filelike, md5_file from .settings import DEFAULT_CONFIG_INDENT_SIZE # If the filepath of this configuration does not yet exist, simply write it. if not os.path.isfile(self.filepath): self._atomic_write() return self # Otherwise, we write the content to a temporary file and compare its md5 checksum with the current config on # disk. When the checksums differ, we first create a backup and only then overwrite the existing file. with tempfile.NamedTemporaryFile() as handle: json.dump(self.dictionary, handle, indent=DEFAULT_CONFIG_INDENT_SIZE) handle.seek(0) if md5_from_filelike(handle) != md5_file(self.filepath): self._backup(self.filepath) self._atomic_write() return self
def set_file(self, file): """Store the file in the repository and parse it to set the `element` and `md5` attributes. :param file: filepath or filelike object of the UPF potential file to store. """ # pylint: disable=redefined-builtin from aiida.common.exceptions import ParsingError from aiida.common.files import md5_file, md5_from_filelike parsed_data = parse_upf(file) try: md5sum = md5_file(file) except TypeError: md5sum = md5_from_filelike(file) try: element = parsed_data['element'] except KeyError: raise ParsingError( "No 'element' parsed in the UPF file {}; unable to store". format(self.filename)) super(UpfData, self).set_file(file) self.set_attribute('element', str(element)) self.set_attribute('md5', md5sum)
def store(self): """Write the current config to file. .. note:: if the configuration file already exists on disk and its contents differ from those in memory, a backup of the original file on disk will be created before overwriting it. :return: self """ import tempfile from aiida.common.files import md5_from_filelike, md5_file # If the filepath of this configuration does not yet exist, simply write it. if not os.path.isfile(self.filepath): with io.open(self.filepath, 'wb') as handle: self._write(handle) return self # Otherwise, we write the content to a temporary file and compare its md5 checksum with the current config on # disk. When the checksums differ, we first create a backup and only then overwrite the existing file. with tempfile.NamedTemporaryFile() as handle: self._write(handle) handle.seek(0) if md5_from_filelike(handle) != md5_file(self.filepath): self._backup(self.filepath) shutil.copy(handle.name, self.filepath) return self
def store(self, *args, **kwargs): """ Store the node, reparsing the file so that the md5 and the element are correctly reset. (**why?) """ from aiida.common.exceptions import ParsingError, ValidationError import aiida.common.utils from aiida.common.files import md5_from_filelike if self.is_stored: return self # Already done?? with self.open(mode='r') as handle: parsed_data = parse_psml(handle.name) # Open in binary mode which is required for generating the md5 checksum with self.open(mode='rb') as handle: md5sum = md5_from_filelike(handle) try: element = parsed_data['element'] except KeyError: raise ParsingError("No 'element' parsed in the PSML file {};" " unable to store".format(self.filename)) self.set_attribute('element', str(element)) self.set_attribute('md5', md5sum) return super(PsmlData, self).store(*args, **kwargs)
def store(self, *args, **kwargs): """Store the node, reparsing the file so that the md5 and the element are correctly reset.""" # pylint: disable=arguments-differ from aiida.common.exceptions import ParsingError from aiida.common.files import md5_from_filelike if self.is_stored: return self with self.open(mode='r') as handle: parsed_data = parse_upf(handle) # Open in binary mode which is required for generating the md5 checksum with self.open(mode='rb') as handle: md5 = md5_from_filelike(handle) try: element = parsed_data['element'] except KeyError: raise ParsingError( 'Could not parse the element from the UPF file {}'.format( self.filename)) self.set_attribute('element', str(element)) self.set_attribute('md5', md5) return super(UpfData, self).store(*args, **kwargs)
def get_or_create(cls, source: typing.Union[str, pathlib.Path, typing.BinaryIO], filename: str = None): """Get pseudopotenial data node from database with matching md5 checksum or create a new one if not existent. :param source: the source pseudopotential content, either a binary stream, or a ``str`` or ``Path`` to the path of the file on disk, which can be relative or absolute. :param filename: optional explicit filename to give to the file stored in the repository. :return: instance of ``PseudoPotentialData``, stored if taken from database, unstored otherwise. :raises TypeError: if the source is not a ``str``, ``pathlib.Path`` instance or binary stream. :raises FileNotFoundError: if the source is a filepath but does not exist. """ source = cls.prepare_source(source) query = orm.QueryBuilder() query.append( cls, subclassing=False, filters={f'attributes.{cls._key_md5}': md5_from_filelike(source)}) existing = query.first() if existing: pseudo = existing[0] else: source.seek(0) pseudo = cls(source, filename) return pseudo
def set_file(self, source: typing.Union[str, pathlib.Path, typing.BinaryIO], filename: str = None, **kwargs): """Set the file content. .. note:: this method will first analyse the type of the ``source`` and if it is a filepath will convert it to a binary stream of the content located at that filepath, which is then passed on to the superclass. This needs to be done first, because it will properly set the file and filename attributes that are expected by other methods. Straight after the superclass call, the source seeker needs to be reset to zero if it needs to be read again, because the superclass most likely will have read the stream to the end. Finally it is important that the ``prepare_source`` is called here before the superclass invocation, because this way the conversion from filepath to byte stream will be performed only once. Otherwise, each subclass would perform the conversion over and over again. :param source: the source pseudopotential content, either a binary stream, or a ``str`` or ``Path`` to the path of the file on disk, which can be relative or absolute. :param filename: optional explicit filename to give to the file stored in the repository. :raises TypeError: if the source is not a ``str``, ``pathlib.Path`` instance or binary stream. :raises FileNotFoundError: if the source is a filepath but does not exist. """ source = self.prepare_source(source) super().set_file(source, filename, **kwargs) source.seek(0) self.md5 = md5_from_filelike(source)
def set_file(self, file, filename=None): """Store the file in the repository and parse it to set the `element` and `md5` attributes. :param file: filepath or filelike object of the UPF potential file to store. Hint: Pass io.BytesIO(b"my string") to construct the file directly from a string. :param filename: specify filename to use (defaults to name of provided file). """ # pylint: disable=redefined-builtin from aiida.common.exceptions import ParsingError from aiida.common.files import md5_file, md5_from_filelike parsed_data = parse_upf(file) try: md5sum = md5_file(file) except TypeError: md5sum = md5_from_filelike(file) try: element = parsed_data['element'] except KeyError: raise ParsingError( "No 'element' parsed in the UPF file {}; unable to store". format(self.filename)) super(UpfData, self).set_file(file, filename=filename) self.set_attribute('element', str(element)) self.set_attribute('md5', md5sum)
def generate_md5(self): """ Computes and returns MD5 hash of the CIF file. """ from aiida.common.files import md5_from_filelike # Open in binary mode which is required for generating the md5 checksum with self.open(mode='rb') as handle: return md5_from_filelike(handle)
def set_file(self, stream: typing.BinaryIO, filename: str = None, **kwargs): """Set the file content. :param stream: a filelike object with the binary content of the file. :param filename: optional explicit filename to give to the file stored in the repository. """ super().set_file(stream, filename, **kwargs) stream.seek(0) self.md5 = md5_from_filelike(stream)
def validate_md5(self, md5: str): """Validate that the md5 checksum matches that of the currently stored file. :param value: the md5 checksum. :raises ValueError: if the md5 does not match that of the currently stored file. """ with self.open(mode='rb') as handle: md5_file = md5_from_filelike(handle) if md5 != md5_file: raise ValueError(f'md5 does not match that of stored file: {md5} != {md5_file}')
def validate_md5(self, md5: str): """ Validate that the md5 checksum matches that of the currently stored file. :param value: the md5 checksum. :raises ValueError: if the md5 does not match that of the currently stored file. """ with self.open(mode='rb') as handle: md5_fil = md5_from_filelike(handle) if md5 != md5_fil: raise ValueError( f'Th md5 does not match that of stored file: {md5} != {md5_fil}. ' 'The attributes of this class can not be modified manually.' )
def get_or_create(cls, source, filename=None): """ Pass the same parameter of the __init__; if a file with the same md5 is found, that IonData is returned, otherwise a new IonFile instance is created. :param source: an absolute path file on disk or a filelike object. :param filename: optional explicit filename to give to the file stored in the repository. Ignored if a file with the same md5 has been found. :return ion: the IonData object. """ from aiida import orm if isinstance(source, (str, pathlib.Path)): if not pathlib.Path(source).is_file(): raise TypeError( f'`source` should be a str or pathlib.Path of a filepath on disk, got: {source}' ) source = cls._prepare_source(source) source.seek(0) readable_bytes = (hasattr(source, 'read') and hasattr(source, 'mode') and 'b' in source.mode) bol = isinstance(source, io.BytesIO) or readable_bytes if not bol: raise TypeError( f'`source` should be a str or `pathlib.Path` of a filepath on disk or a stream of bytes, got: {source}' ) query = orm.QueryBuilder() query.append(cls, subclassing=False, filters={'attributes.md5': md5_from_filelike(source)}) existing = query.first() if existing: ion = existing[0] else: source.seek(0) ion = cls(source, filename) return ion
def get_or_create(cls, stream: typing.BinaryIO, filename: str = None): """Get basis data node from database with matching md5 checksum or create a new one if not existent. :param stream: a filelike object with the binary content of the file. :param filename: optional explicit filename to give to the file stored in the repository. :return: instance of ``BasisData``, stored if taken from database, unstored otherwise. """ query = orm.QueryBuilder() query.append(cls, subclassing=False, filters={f'attributes.{cls._key_md5}': md5_from_filelike(stream)}) existing = query.first() if existing: basis = existing[0] else: stream.seek(0) basis = cls(stream, filename) return basis
def test_md5(): """Test the `PseudoPotentialData.md5` property.""" stream = io.BytesIO(b'pseudo') md5 = md5_from_filelike(stream) stream.seek(0) pseudo = PseudoPotentialData(stream) pseudo.element = 'Ar' assert pseudo.md5 == md5 with pytest.raises(ValueError, match=r'md5 does not match that of stored file.*'): pseudo.md5 = 'abcdef0123456789' pseudo.store() with pytest.raises( ModificationNotAllowed, match='the attributes of a stored entity are immutable'): pseudo.md5 = md5
def _validate(self): from aiida.common.exceptions import ValidationError, ParsingError from aiida.common.files import md5_from_filelike import aiida.common.utils super(PsmlData, self)._validate() # Yet another parsing ??? with self.open(mode='r') as handle: parsed_data = parse_psml(handle.name) # Open in binary mode which is required for generating the md5 checksum with self.open(mode='rb') as handle: md5 = md5_from_filelike(handle) # TODO: This is erroneous exception, # as it is in the `upf` module oin `aiida_core` try: element = parsed_data['element'] except KeyError: raise ValidationError("No 'element' could be parsed in the PSML " "file {}".format(psml_abspath)) try: attr_element = self.get_attribute('element') except AttributeError: raise ValidationError("attribute 'element' not set.") try: attr_md5 = self.get_attribute('md5') except AttributeError: raise ValidationError("attribute 'md5' not set.") if attr_element != element: raise ValidationError("Attribute 'element' says '{}' but '{}' was " "parsed instead.".format( attr_element, element)) if attr_md5 != md5: raise ValidationError("Attribute 'md5' says '{}' but '{}' was " "parsed instead.".format(attr_md5, md5))
def _validate(self): """Validate the UPF potential file stored for this node.""" from aiida.common.exceptions import ValidationError from aiida.common.files import md5_from_filelike super(UpfData, self)._validate() with self.open(mode='r') as handle: parsed_data = parse_upf(handle) # Open in binary mode which is required for generating the md5 checksum with self.open(mode='rb') as handle: md5 = md5_from_filelike(handle) try: element = parsed_data['element'] except KeyError: raise ValidationError( "No 'element' could be parsed in the UPF {}".format( self.filename)) try: attr_element = self.get_attribute('element') except AttributeError: raise ValidationError("attribute 'element' not set.") try: attr_md5 = self.get_attribute('md5') except AttributeError: raise ValidationError("attribute 'md5' not set.") if attr_element != element: raise ValidationError( "Attribute 'element' says '{}' but '{}' was parsed instead.". format(attr_element, element)) if attr_md5 != md5: raise ValidationError( "Attribute 'md5' says '{}' but '{}' was parsed instead.". format(attr_md5, md5))
def test_store(): """Test the `BasisData.store` method.""" stream = io.BytesIO(b'basis') md5_correct = md5_from_filelike(stream) md5_incorrect = 'abcdef0123456789' stream.seek(0) basis = BasisData(io.BytesIO(b'basis')) with pytest.raises(StoringNotAllowed, match='no valid element has been defined.'): basis.store() basis.element = 'Ar' basis.set_attribute(BasisData._key_md5, md5_incorrect) # pylint: disable=protected-access with pytest.raises(StoringNotAllowed, match=r'md5 does not match that of stored file:'): basis.store() basis.md5 = md5_correct result = basis.store() assert result is basis assert basis.is_stored
def test_store(): """Test the `PseudoPotentialData.store` method.""" stream = io.BytesIO(b'pseudo') md5_correct = md5_from_filelike(stream) md5_incorrect = 'abcdef0123456789' stream.seek(0) pseudo = PseudoPotentialData(io.BytesIO(b'pseudo')) with pytest.raises(StoringNotAllowed, match='no valid element has been defined.'): pseudo.store() pseudo.element = 'Ar' pseudo.set_attribute(PseudoPotentialData._key_md5, md5_incorrect) # pylint: disable=protected-access with pytest.raises(StoringNotAllowed, match=r'md5 does not match that of stored file:'): pseudo.store() pseudo.md5 = md5_correct result = pseudo.store() assert result is pseudo assert pseudo.is_stored
def set_file( self, source: typing.Union[str, pathlib.Path, typing.BinaryIO], filename: str = None, pair_style: str = None, species: list = None, atom_style: str = None, units: str = None, extra_tags: dict = None, **kwargs, ): """Set the file content. .. note:: this method will first analyse the type of the ``source`` and if it is a filepath will convert it to a binary stream of the content located at that filepath, which is then passed on to the superclass. This needs to be done first, because it will properly set the file and filename attributes that are expected by other methods. Straight after the superclass call, the source seeker needs to be reset to zero if it needs to be read again, because the superclass most likely will have read the stream to the end. Finally it is important that the ``prepare_source`` is called here before the superclass invocation, because this way the conversion from filepath to byte stream will be performed only once. Otherwise, each subclass would perform the conversion over and over again. :param source: the source lammps potential content, either a binary stream, or a ``str`` or ``Path`` to the path of the file on disk, which can be relative or absolute. :type source: typing.Union[str, pathlib.Path, typing.BinaryIO] :param filename: optional explicit filename to give to the file stored in the repository. :type filename: str :param pair_style: Type of potential according to LAMMPS :type pair_style: str :param species: Species that can be used for this potential. :type species: list :param atom_style: Type of treatment of the atoms according to LAMMPS. :type atom_style: str :param units: Default units to be used with this potential. :type unite: str :param extra_tags: Dictionary with extra information to tag the potential, based on the KIM schema. :type extra_tags: dict :raises TypeError: if the source is not a ``str``, ``pathlib.Path`` instance or binary stream. :raises FileNotFoundError: if the source is a filepath but does not exist. """ # pylint: disable=too-many-arguments source = self.prepare_source(source) if self.pair_style is not None and pair_style is None: pair_style = self.pair_style if self.species is not None and species is None: species = self.species if self.atom_style is not None and atom_style is None: atom_style = self.atom_style if self.units is not None and units is None: units = self.units if self.extra_tags is not None and extra_tags is None: extra_tags = self.extra_tags self.validate_pair_style(pair_style=pair_style) self.validate_species(species=species) self.validate_atom_style(atom_style=atom_style, pair_style=pair_style) self.validate_units(units=units, pair_style=pair_style) if extra_tags is None: extra_tags = {} if extra_tags is not None: self.validate_extra_tags(extra_tags=extra_tags) for key in self._extra_keys: self.set_attribute(key, extra_tags.get(key, None)) super().set_file(source, filename, **kwargs) source.seek(0) self.md5 = md5_from_filelike(source)