コード例 #1
0
 def _retrieve_version(self) -> str:
     try:
         metadata = json.loads(
             read_file_in_tar(self.filepath, 'metadata.json'))
     except (IOError, FileNotFoundError) as error:
         raise CorruptArchive(str(error))
     if 'export_version' not in metadata:
         raise CorruptArchive(
             "metadata.json doest not contain an 'export_version' key")
     return metadata['export_version']
コード例 #2
0
 def metadata(self) -> ArchiveMetadata:
     metadata = self._get_metadata()
     export_parameters = metadata.get('export_parameters', {})
     output = {
         'export_version':
         metadata['export_version'],
         'aiida_version':
         metadata['aiida_version'],
         'all_fields_info':
         metadata['all_fields_info'],
         'unique_identifiers':
         metadata['unique_identifiers'],
         'graph_traversal_rules':
         export_parameters.get('graph_traversal_rules', None),
         'entities_starting_set':
         export_parameters.get('entities_starting_set', None),
         'include_comments':
         export_parameters.get('include_comments', None),
         'include_logs':
         export_parameters.get('include_logs', None),
         'conversion_info':
         metadata.get('conversion_info', [])
     }
     try:
         return ArchiveMetadata(**output)
     except TypeError as error:
         raise CorruptArchive(f'Metadata invalid: {error}')
コード例 #3
0
    def iter_node_repos(
        self,
        uuids: Iterable[str],
        callback: Callable[[str, Any], None] = null_callback,
    ) -> Iterator[Folder]:
        path_prefixes = [
            os.path.join(self.REPO_FOLDER, export_shard_uuid(uuid))
            for uuid in uuids
        ]

        if not path_prefixes:
            return
        self.assert_within_context()
        assert self._sandbox is not None  # required by mypy

        # unarchive the common folder if it does not exist
        common_prefix = os.path.commonpath(path_prefixes)
        if not self._sandbox.get_subfolder(common_prefix).exists():
            self._extract(path_prefix=common_prefix, callback=callback)

        callback(
            'init', {
                'total': len(path_prefixes),
                'description': 'Iterating node repositories'
            })
        for uuid, path_prefix in zip(uuids, path_prefixes):
            callback('update', 1)
            subfolder = self._sandbox.get_subfolder(path_prefix)
            if not subfolder.exists():
                raise CorruptArchive(
                    f'Unable to find the repository folder for Node with UUID={uuid} in the exported file'
                )
            yield subfolder
コード例 #4
0
 def _extract_archive(self, filepath: Path, callback: Callable[[str, Any],
                                                               None]):
     try:
         ZipPath(self.filepath, mode='r',
                 allow_zip64=True).extract_tree(filepath, callback=callback)
     except zipfile.BadZipfile as error:
         raise CorruptArchive(f'The input file cannot be read: {error}')
コード例 #5
0
 def _extract(self,
              *,
              path_prefix: str,
              callback: Callable[[str, Any], None] = null_callback):
     self.assert_within_context()
     assert self._sandbox is not None  # required by mypy
     try:
         ZipPath(self.filename, mode='r',
                 allow_zip64=True).joinpath(path_prefix).extract_tree(
                     self._sandbox.abspath,
                     callback=callback,
                     cb_descript='Extracting repository files')
     except zipfile.BadZipfile as error:
         raise CorruptArchive(f'The input file cannot be read: {error}')
     except NotADirectoryError as error:
         raise CorruptArchive(
             f'Unable to find required folder in archive: {error}')
コード例 #6
0
 def _get_data(self):
     if self._data is None:
         try:
             self._data = json.loads(
                 read_file_in_tar(self.filename, self.FILENAME_DATA))
         except (IOError, FileNotFoundError) as error:
             raise CorruptArchive(str(error))
     return self._data
コード例 #7
0
 def _get_data(self):
     if self._data is None:
         path = Path(self.filename) / self.FILENAME_DATA
         if not path.exists():
             raise CorruptArchive(
                 f'required file `{self.FILENAME_DATA}` is not included')
         self._data = json.loads(path.read_text(encoding='utf8'))
     return self._data
コード例 #8
0
def extract_tar(infile, folder, nodes_export_subfolder=None, **kwargs):
    """
    Extract the nodes to be imported from a (possibly zipped) tar file.

    :param infile: file path
    :type infile: str

    :param folder: a temporary fodler used to extract the file tree
    :type folder: :py:class:`~aiida.common.folders.SandboxFolder`

    :param nodes_export_subfolder: name of the subfolder for AiiDA nodes
    :type nodes_export_subfolder: str

    :param silent: suppress progress bar
    :type silent: bool

    :raises TypeError: if parameter types are not respected
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the archive misses files or files have
        incorrect formats
    """
    # pylint: disable=fixme
    if nodes_export_subfolder:
        if not isinstance(nodes_export_subfolder, str):
            raise TypeError('nodes_export_subfolder must be a string')
    else:
        nodes_export_subfolder = NODES_EXPORT_SUBFOLDER

    try:
        with tarfile.open(infile, 'r:*', format=tarfile.PAX_FORMAT) as handle:

            if len(handle.getmembers()) == 1 and handle.getmembers()[0].size == 0:
                raise CorruptArchive('no files detected in archive')

            file_iterator = get_file_iterator(file_handle=handle, folderpath=folder.abspath, **kwargs)

            for member in file_iterator:
                if member.isdev():
                    # safety: skip if character device, block device or FIFO
                    print('WARNING, device found inside the import file: {}'.format(member.name), file=sys.stderr)
                    continue
                if member.issym() or member.islnk():
                    # safety: in export, I set dereference=True therefore
                    # there should be no symbolic or hard links.
                    print('WARNING, symlink found inside the import file: {}'.format(member.name), file=sys.stderr)
                    continue
                # Check that we are only exporting nodes within the subfolder!
                # TODO: better check such that there are no .. in the
                # path; use probably the folder limit checks
                if not member.name.startswith(nodes_export_subfolder + os.sep):
                    continue

                update_description(member.name, file_iterator)

                handle.extract(path=folder.abspath, member=member)
    except tarfile.ReadError:
        raise ValueError('The input file format for import is not valid (not a tar file)')
    close_progress_bar(leave=False)
コード例 #9
0
 def _extract_archive(self, filepath: Path, callback: Callable[[str, Any],
                                                               None]):
     try:
         TarPath(self.filepath, mode='r:*',
                 pax_format=tarfile.PAX_FORMAT).extract_tree(
                     filepath,
                     allow_dev=False,
                     allow_symlink=False,
                     callback=callback)
     except tarfile.ReadError as error:
         raise CorruptArchive(f'The input file cannot be read: {error}')
コード例 #10
0
 def iter_entity_fields(
     self,
     name: str,
     fields: Optional[Tuple[str, ...]] = None
 ) -> Iterator[Tuple[int, Dict[str, Any]]]:
     if name not in self.entity_names:
         raise ValueError(f'Unknown entity name: {name}')
     data = self._get_data()['export_data'].get(name, {})
     if name == NODE_ENTITY_NAME:
         # here we merge in the attributes and extras before yielding
         attributes = self._get_data().get('node_attributes', {})
         extras = self._get_data().get('node_extras', {})
         for pk, all_fields in data.items():
             if pk not in attributes:
                 raise CorruptArchive(
                     f'Unable to find attributes info for Node with Pk={pk}'
                 )
             if pk not in extras:
                 raise CorruptArchive(
                     f'Unable to find extra info for Node with Pk={pk}')
             all_fields = {
                 **all_fields,
                 **{
                     'attributes': attributes[pk],
                     'extras': extras[pk]
                 }
             }
             if fields is not None:
                 all_fields = {
                     k: v
                     for k, v in all_fields.items() if k in fields
                 }
             yield int(pk), all_fields
     else:
         for pk, all_fields in data.items():
             if fields is not None:
                 all_fields = {
                     k: v
                     for k, v in all_fields.items() if k in fields
                 }
             yield int(pk), all_fields
コード例 #11
0
    def unpack(self):
        """Unpack the archive and store the contents in a sandbox."""
        if os.path.isdir(self.filepath):
            extract_tree(self.filepath, self.folder)
        elif tarfile.is_tarfile(self.filepath):
            extract_tar(self.filepath, self.folder, silent=self._silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER)
        elif zipfile.is_zipfile(self.filepath):
            extract_zip(self.filepath, self.folder, silent=self._silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER)
        else:
            raise CorruptArchive('unrecognized archive format')

        if not self.folder.get_content_list():
            raise ContentNotExistent('the provided archive {} is empty'.format(self.filepath))

        self._unpacked = True
コード例 #12
0
def extract_zip(infile, folder, nodes_export_subfolder=None, **kwargs):
    """Extract the nodes to be imported from a zip file.

    :param infile: file path
    :type infile: str

    :param folder: a temporary folder used to extract the file tree
    :type folder: :py:class:`~aiida.common.folders.SandboxFolder`

    :param nodes_export_subfolder: name of the subfolder for AiiDA nodes
    :type nodes_export_subfolder: str

    :param silent: suppress progress bar
    :type silent: bool

    :raises TypeError: if parameter types are not respected
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the archive misses files or files have
        incorrect formats
    """
    # pylint: disable=fixme
    if nodes_export_subfolder:
        if not isinstance(nodes_export_subfolder, str):
            raise TypeError('nodes_export_subfolder must be a string')
    else:
        nodes_export_subfolder = NODES_EXPORT_SUBFOLDER

    try:
        with zipfile.ZipFile(infile, 'r', allowZip64=True) as handle:

            if not handle.namelist():
                raise CorruptArchive('no files detected in archive')

            file_iterator = get_file_iterator(file_handle=handle, folderpath=folder.abspath, **kwargs)

            for membername in file_iterator:
                # Check that we are only exporting nodes within the subfolder!
                # TODO: better check such that there are no .. in the
                # path; use probably the folder limit checks
                if not membername.startswith(nodes_export_subfolder + os.sep):
                    continue

                update_description(membername, file_iterator)

                handle.extract(path=folder.abspath, member=membername)
    except zipfile.BadZipfile:
        raise ValueError('The input file format for import is not valid (not a zip file)')
    close_progress_bar(leave=False)
コード例 #13
0
def get_file_iterator(file_handle, folderpath, silent=True, **kwargs):  # pylint: disable=unused-argument
    """Go through JSON files and then return new file_iterator

    :param file_handle: A file handle returned from `with open() as file_handle:`.
    :type file_handle: `tarfile.TarFile`, `zipfile.ZipFile`

    :param folderpath: Path to folder.
    :type folderpath: str

    :param silent: suppress progress bar.
    :type silent: bool

    :return: List of filenames in the archive, wrapped in the `tqdm` progress bar.
    :rtype: `tqdm.tqdm`
    """
    json_files = {'metadata.json', 'data.json'}

    if isinstance(file_handle, tarfile.TarFile):
        file_format = 'tar'
    elif isinstance(file_handle, zipfile.ZipFile):
        file_format = 'zip'
    else:
        raise TypeError('Can only handle Tar or Zip files.')

    close_progress_bar(leave=False)
    file_iterator = get_progress_bar(iterable=json_files, leave=False, disable=silent)

    for json_file in file_iterator:
        update_description(json_file, file_iterator)

        try:
            if file_format == 'tar':
                file_handle.extract(path=folderpath, member=file_handle.getmember(json_file))
            else:
                file_handle.extract(path=folderpath, member=json_file)
        except KeyError:
            raise CorruptArchive('required file `{}` is not included'.format(json_file))

    close_progress_bar(leave=False)
    if file_format == 'tar':
        return get_progress_bar(iterable=file_handle.getmembers(), unit='files', leave=False, disable=silent)
    # zip
    return get_progress_bar(iterable=file_handle.namelist(), unit='files', leave=False, disable=silent)
コード例 #14
0
def data_migration_legacy_process_attributes(data):
    """Apply migration 0040 - REV. 1.0.40
    Data migration for some legacy process attributes.

    Attribute keys that are renamed:

    * `_sealed` -> `sealed`

    Attribute keys that are removed entirely:

    * `_finished`
    * `_failed`
    * `_aborted`
    * `_do_abort`

    Finally, after these first migrations, any remaining process nodes are screened for the existence of the
    `process_state` attribute. If they have it, it is checked whether the state is active or not, if not, the `sealed`
    attribute is created and set to `True`.

    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if a Node, found to have attributes,
        cannot be found in the list of exported entities.
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the 'sealed' attribute does not exist and
        the ProcessNode is in an active state, i.e. `process_state` is one of ('created', 'running', 'waiting').
        A log-file, listing all illegal ProcessNodes, will be produced in the current directory.
    """
    from aiida.tools.importexport.common.exceptions import CorruptArchive
    from aiida.manage.database.integrity import write_database_integrity_violation

    attrs_to_remove = [
        '_sealed', '_finished', '_failed', '_aborted', '_do_abort'
    ]
    active_states = {'created', 'running', 'waiting'}
    illegal_cases = []

    for node_pk, content in data['node_attributes'].items():
        try:
            if data['export_data']['Node'][node_pk]['node_type'].startswith(
                    'process.'):
                # Check if the ProcessNode has a 'process_state' attribute, and if it's non-active.
                # Raise if the ProcessNode is in an active state, otherwise set `'sealed' = True`
                process_state = content.get('process_state', '')
                if process_state in active_states:
                    # The ProcessNode is in an active state, and should therefore never have been allowed
                    # to be exported. The Node will be added to a log that is saved in the working directory,
                    # then a CorruptArchive will be raised, since the archive needs to be migrated manually.
                    uuid_pk = data['export_data']['Node'][node_pk].get(
                        'uuid', node_pk)
                    illegal_cases.append([uuid_pk, process_state])
                    continue  # No reason to do more now

                # Either the ProcessNode is in a non-active state or its 'process_state' hasn't been set.
                # In both cases we claim the ProcessNode 'sealed' and make it importable.
                content['sealed'] = True

                # Remove attributes
                for attr in attrs_to_remove:
                    content.pop(attr, None)
        except KeyError as exc:
            raise CorruptArchive(
                f'Your export archive is corrupt! Org. exception: {exc}')

    if illegal_cases:
        headers = ['UUID/PK', 'process_state']
        warning_message = 'Found ProcessNodes with active process states ' \
                          'that should never have been allowed to be exported.'
        write_database_integrity_violation(illegal_cases, headers,
                                           warning_message)

        raise CorruptArchive(
            'Your export archive is corrupt! '
            'Please see the log-file in your current directory for more details.'
        )
コード例 #15
0
def extract_zip(infile, folder, nodes_export_subfolder=None, silent=False):
    """
    Extract the nodes to be imported from a zip file.

    :param infile: file path
    :type infile: str

    :param folder: a temporary folder used to extract the file tree
    :type folder: :py:class:`~aiida.common.folders.SandboxFolder`

    :param nodes_export_subfolder: name of the subfolder for AiiDA nodes
    :type nodes_export_subfolder: str

    :param silent: suppress debug print
    :type silent: bool

    :raises TypeError: if parameter types are not respected
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the archive misses files or files have
        incorrect formats
    """
    # pylint: disable=fixme
    if not silent:
        print('READING DATA AND METADATA...')

    if nodes_export_subfolder:
        if not isinstance(nodes_export_subfolder, str):
            raise TypeError('nodes_export_subfolder must be a string')
    else:
        nodes_export_subfolder = NODES_EXPORT_SUBFOLDER

    try:
        with zipfile.ZipFile(infile, 'r', allowZip64=True) as handle:

            if not handle.namelist():
                raise CorruptArchive('no files detected')

            try:
                handle.extract(path=folder.abspath, member='metadata.json')
            except KeyError:
                raise CorruptArchive(
                    'required file `metadata.json` is not included')

            try:
                handle.extract(path=folder.abspath, member='data.json')
            except KeyError:
                raise CorruptArchive(
                    'required file `data.json` is not included')

            if not silent:
                print('EXTRACTING NODE DATA...')

            for membername in handle.namelist():
                # Check that we are only exporting nodes within the subfolder!
                # TODO: better check such that there are no .. in the
                # path; use probably the folder limit checks
                if not membername.startswith(nodes_export_subfolder + os.sep):
                    continue
                handle.extract(path=folder.abspath, member=membername)
    except zipfile.BadZipfile:
        raise ValueError(
            'The input file format for import is not valid (not a zip file)')
コード例 #16
0
def extract_tar(infile, folder, nodes_export_subfolder=None, silent=False):
    """
    Extract the nodes to be imported from a (possibly zipped) tar file.

    :param infile: file path
    :type infile: str

    :param folder: a temporary fodler used to extract the file tree
    :type folder: :py:class:`~aiida.common.folders.SandboxFolder`

    :param nodes_export_subfolder: name of the subfolder for AiiDA nodes
    :type nodes_export_subfolder: str

    :param silent: suppress debug print
    :type silent: bool

    :raises TypeError: if parameter types are not respected
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the archive misses files or files have
        incorrect formats
    """
    # pylint: disable=fixme
    if not silent:
        print('READING DATA AND METADATA...')

    if nodes_export_subfolder:
        if not isinstance(nodes_export_subfolder, str):
            raise TypeError('nodes_export_subfolder must be a string')
    else:
        nodes_export_subfolder = NODES_EXPORT_SUBFOLDER

    try:
        with tarfile.open(infile, 'r:*', format=tarfile.PAX_FORMAT) as handle:

            try:
                handle.extract(path=folder.abspath,
                               member=handle.getmember('metadata.json'))
            except KeyError:
                raise CorruptArchive(
                    'required file `metadata.json` is not included')

            try:
                handle.extract(path=folder.abspath,
                               member=handle.getmember('data.json'))
            except KeyError:
                raise CorruptArchive(
                    'required file `data.json` is not included')

            if not silent:
                print('EXTRACTING NODE DATA...')

            for member in handle.getmembers():
                if member.isdev():
                    # safety: skip if character device, block device or FIFO
                    print('WARNING, device found inside the import file: {}'.
                          format(member.name),
                          file=sys.stderr)
                    continue
                if member.issym() or member.islnk():
                    # safety: in export, I set dereference=True therefore
                    # there should be no symbolic or hard links.
                    print('WARNING, link found inside the import file: {}'.
                          format(member.name),
                          file=sys.stderr)
                    continue
                # Check that we are only exporting nodes within the subfolder!
                # TODO: better check such that there are no .. in the
                # path; use probably the folder limit checks
                if not member.name.startswith(nodes_export_subfolder + os.sep):
                    continue
                handle.extract(path=folder.abspath, member=member)
    except tarfile.ReadError:
        raise ValueError('The input file format for import is not valid (1)')
コード例 #17
0
ファイル: archive.py プロジェクト: zhonger/aiida-core
def extract_tar(infile,
                folder,
                nodes_export_subfolder=None,
                check_files=('data.json', 'metadata.json'),
                **kwargs):
    """
    Extract the nodes to be imported from a (possibly zipped) tar file.

    :param infile: file path
    :type infile: str

    :param folder: a temporary fodler used to extract the file tree
    :type folder: :py:class:`~aiida.common.folders.SandboxFolder`

    :param nodes_export_subfolder: name of the subfolder for AiiDA nodes
    :type nodes_export_subfolder: str

    :param check_files: list of files to check are present

    :param silent: suppress progress bar
    :type silent: bool

    :raises TypeError: if parameter types are not respected
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the archive misses files or files have
        incorrect formats
    """
    warnings.warn(
        'extract_tar function is deprecated and will be removed in AiiDA v2.0.0, '
        'use extract_tree in the archive-path package instead',
        AiidaDeprecationWarning)  # pylint: disable=no-member

    if nodes_export_subfolder:
        if not isinstance(nodes_export_subfolder, str):
            raise TypeError('nodes_export_subfolder must be a string')
    else:
        nodes_export_subfolder = NODES_EXPORT_SUBFOLDER

    if not kwargs.get('silent', False):
        set_progress_bar_tqdm(unit='files')
    else:
        set_progress_reporter(None)

    data_files = set()

    try:
        with tarfile.open(infile, 'r:*', format=tarfile.PAX_FORMAT) as handle:

            members = handle.getmembers()

            if len(members) == 1 and members[0].size == 0:
                raise CorruptArchive('no files detected in archive')

            with get_progress_reporter()(total=len(members)) as progress:

                for member in members:

                    progress.update()

                    if member.isdev():
                        # safety: skip if character device, block device or FIFO
                        print(
                            f'WARNING, device found inside the import file: {member.name}',
                            file=sys.stderr)
                        continue
                    if member.issym() or member.islnk():
                        # safety: in export, I set dereference=True therefore
                        # there should be no symbolic or hard links.
                        print(
                            f'WARNING, symlink found inside the import file: {member.name}',
                            file=sys.stderr)
                        continue
                    # Check that we are only exporting nodes within the subfolder!
                    # better check such that there are no .. in the
                    # path; use probably the folder limit checks
                    if member.name in check_files:
                        data_files.add(member.name)
                    elif not member.name.startswith(nodes_export_subfolder +
                                                    os.sep):
                        continue

                    _update_description(member.name, progress)

                    handle.extract(path=folder.abspath, member=member)
    except tarfile.ReadError:
        raise ValueError(
            'The input file format for import is not valid (not a tar file)')

    for name in check_files:
        if name not in data_files:
            raise CorruptArchive('Archive missing required file:f {name}')
コード例 #18
0
ファイル: archive.py プロジェクト: zhonger/aiida-core
def extract_zip(infile,
                folder,
                nodes_export_subfolder=None,
                check_files=('data.json', 'metadata.json'),
                **kwargs):
    """Extract the nodes to be imported from a zip file.

    :param infile: file path
    :type infile: str

    :param folder: a temporary folder used to extract the file tree
    :type folder: :py:class:`~aiida.common.folders.SandboxFolder`

    :param nodes_export_subfolder: name of the subfolder for AiiDA nodes
    :type nodes_export_subfolder: str

    :param check_files: list of files to check are present

    :param silent: suppress progress bar
    :type silent: bool

    :raises TypeError: if parameter types are not respected
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the archive misses files or files have
        incorrect formats
    """
    warnings.warn(
        'extract_zip function is deprecated and will be removed in AiiDA v2.0.0, '
        'use extract_tree in the archive-path package instead',
        AiidaDeprecationWarning)  # pylint: disable=no-member

    if nodes_export_subfolder:
        if not isinstance(nodes_export_subfolder, str):
            raise TypeError('nodes_export_subfolder must be a string')
    else:
        nodes_export_subfolder = NODES_EXPORT_SUBFOLDER

    if not kwargs.get('silent', False):
        set_progress_bar_tqdm(unit='files')
    else:
        set_progress_reporter(None)

    data_files = set()

    try:
        with zipfile.ZipFile(infile, 'r', allowZip64=True) as handle:

            members = handle.namelist()

            if not members:
                raise CorruptArchive('no files detected in archive')

            with get_progress_reporter()(total=len(members)) as progress:

                for membername in members:

                    progress.update()

                    # Check that we are only exporting nodes within the subfolder!
                    # better check such that there are no .. in the
                    # path; use probably the folder limit checks
                    if membername in check_files:
                        data_files.add(membername)
                    elif not membername.startswith(nodes_export_subfolder +
                                                   os.sep):
                        continue

                    _update_description(membername, progress)

                    handle.extract(path=folder.abspath, member=membername)

    except zipfile.BadZipfile:
        raise ValueError(
            'The input file format for import is not valid (not a zip file)')

    for name in check_files:
        if name not in data_files:
            raise CorruptArchive('Archive missing required file:f {name}')
コード例 #19
0
 def export_version(self) -> str:
     metadata = self._get_metadata()
     if 'export_version' not in metadata:
         raise CorruptArchive('export_version missing from metadata.json')
     return metadata['export_version']