Exemple #1
0
 def _compress_archive_tar(in_path: Path, out_path: Path):
     """Create a new zip compressed tar from a folder."""
     with get_progress_reporter()(total=1,
                                  desc='Compressing to tar') as progress:
         _callback = create_callback(progress)
         with TarPath(out_path, mode='w:gz', dereference=True) as path:
             path.puttree(in_path,
                          check_exists=False,
                          callback=_callback,
                          cb_descript='Compressing to tar')
Exemple #2
0
 def _extract_archive(self, filepath: Path, callback: Callable[[str, Any],
                                                               None]):
     try:
         TarPath(self.filepath, mode='r:*',
                 pax_format=tarfile.PAX_FORMAT).extract_tree(
                     filepath,
                     allow_dev=False,
                     allow_symlink=False,
                     callback=callback)
     except tarfile.ReadError as error:
         raise CorruptArchive(f'The input file cannot be read: {error}')
Exemple #3
0
 def open(self):
     # pylint: disable=attribute-defined-outside-init
     self.assert_within_context()
     # create a temporary folder in which to perform the write
     self._temp_path: Path = Path(tempfile.mkdtemp())
     # open a zipfile in in write mode to export to
     self._archivepath: TarPath = TarPath(self._temp_path / 'export',
                                          mode='w:gz',
                                          dereference=True)
     # setup data to store
     self._data: Dict[str, Any] = {
         'node_attributes': {},
         'node_extras': {},
         'export_data': {},
         'links_uuid': [],
         'groups_uuid': {},
     }
Exemple #4
0
 def _extract(self,
              *,
              path_prefix: str,
              callback: Callable[[str, Any], None] = null_callback):
     self.assert_within_context()
     assert self._sandbox is not None  # required by mypy
     try:
         TarPath(self.filename,
                 mode='r:*').joinpath(path_prefix).extract_tree(
                     self._sandbox.abspath,
                     allow_dev=False,
                     allow_symlink=False,
                     callback=callback,
                     cb_descript='Extracting repository files')
     except tarfile.ReadError as error:
         raise CorruptArchive(f'The input file cannot be read: {error}')
     except NotADirectoryError as error:
         raise CorruptArchive(
             f'Unable to find required folder in archive: {error}')
Exemple #5
0
def test_glob_all_tar(tmp_path):
    """Test that the `*/**` pattern matches the central directory list."""
    for name in ("a", "b", "c"):
        tmp_path.joinpath(name).touch()
    tmp_path.joinpath("d").mkdir()
    tmp_path.joinpath("e").joinpath("f").mkdir(parents=True)
    for name in ("x", "y", "z"):
        tmp_path.joinpath("e").joinpath("f").joinpath(name).touch()
    with tarfile.TarFile(tmp_path / "archive.tar", "w") as zipper:
        for path in tmp_path.glob("**/*"):
            if path.name in ("archive.tar", "e"):
                continue
            zipper.add(
                str(path), path.relative_to(tmp_path).as_posix(), recursive=False
            )
        namelist = sorted(n.rstrip("/") for n in zipper.getnames())
    with TarPath(tmp_path / "archive.tar") as zpath:
        assert (
            sorted(p.at for p in zpath.glob("**/*", include_virtual=False)) == namelist
        )
Exemple #6
0
    def _migrate(filename_archive, version_old, version_new, migration_method, archive_kwargs=None):
        """Migrate one of the archives from `aiida-export-migration-tests`.

        :param filename_archive: the relative file name of the archive
        :param version_old: version of the archive
        :param version_new: version to migrate to
        :param migration_method: the migration method that should convert between version_old and version_new
        :return: the migrated metadata and data as a tuple

        """
        archive_path = get_archive_file(
            filename_archive,
            **(archive_kwargs or {
                'filepath': 'archives',
                'external_module': 'aiida-export-migration-tests'
            })
        )
        out_path = tmp_path / 'out.aiida'

        if zipfile.is_zipfile(archive_path):
            ZipPath(archive_path).extract_tree(out_path)
        elif tarfile.is_tarfile(archive_path):
            TarPath(archive_path).extract_tree(out_path)
        else:
            raise ValueError('invalid file format, expected either a zip archive or gzipped tarball')

        folder = CacheFolder(out_path)
        _, old_metadata = folder.load_json('metadata.json')
        verify_metadata_version(old_metadata, version=version_old)

        migration_method(folder)

        _, metadata = folder.load_json('metadata.json')
        verify_metadata_version(metadata, version=version_new)

        _, data = folder.load_json('data.json')

        return metadata, data
Exemple #7
0
def test_illegal_create_links(external_archive, tmp_path):
    """Test illegal create links from workchain are detected and removed from exports using v0.3"""
    # Initialization
    dirpath_archive = get_archive_file('export_v0.3.aiida', **external_archive)
    known_illegal_links = 2

    out_path = tmp_path / 'aiida.out'

    # Migrate
    if zipfile.is_zipfile(dirpath_archive):
        ZipPath(dirpath_archive).extract_tree(out_path)
    elif tarfile.is_tarfile(dirpath_archive):
        TarPath(dirpath_archive).extract_tree(out_path)
    else:
        raise ValueError(
            'invalid file format, expected either a zip archive or gzipped tarball'
        )

    try:
        data = json.loads((out_path / 'data.json').read_text('utf8'))
    except IOError:
        raise NotExistent(
            f'export archive does not contain the required file {out_path}')

    # Check illegal create links are present in org. archive file
    links_count = len(data['links_uuid'])
    links_count_migrated = links_count - known_illegal_links

    workfunc_uuids = {
        value['uuid']
        for value in data['export_data']['Node'].values()
        if value['type'].startswith('calculation.function')
        or value['type'].startswith('calculation.work')
    }
    violations = []
    for link in data['links_uuid']:
        if link['input'] in workfunc_uuids and link['type'] == 'createlink':
            violations.append(link)
    assert len(violations) == known_illegal_links, (
        f'{known_illegal_links} illegal create links were expected, instead {len(violations)} was/were found'
    )

    # Migrate to v0.4
    folder = CacheFolder(out_path)
    migrate_v3_to_v4(folder)

    _, data = folder.load_json('data.json')

    # Check illegal create links were removed
    assert len(data['links_uuid']) == links_count_migrated, (
        f"{links_count_migrated} links were expected, instead {len(data['links_uuid'])} was/were found"
    )

    workfunc_uuids = {
        value['uuid']
        for value in data['export_data']['Node'].values()
        if value['node_type'].find('WorkFunctionNode') != -1
        or value['node_type'].find('WorkChainNode') != -1
    }
    violations = []
    for link in data['links_uuid']:
        if link['input'] in workfunc_uuids and link['type'] == 'create':
            violations.append(link)
    assert len(
        violations
    ) == 0, f'0 illegal links were expected, instead {len(violations)} was/were found'
Exemple #8
0
def test_migrate_external(external_archive, tmp_path):
    """Test migration for file containing complete v0.3 era possibilities"""

    # Get metadata.json and data.json as dicts from v0.3 file archive
    dirpath_archive = get_archive_file('export_v0.3.aiida', **external_archive)

    out_path = tmp_path / 'aiida.out'

    # Migrate
    if zipfile.is_zipfile(dirpath_archive):
        ZipPath(dirpath_archive).extract_tree(out_path)
    elif tarfile.is_tarfile(dirpath_archive):
        TarPath(dirpath_archive).extract_tree(out_path)
    else:
        raise ValueError(
            'invalid file format, expected either a zip archive or gzipped tarball'
        )

    try:
        metadata = json.loads((out_path / 'metadata.json').read_text('utf8'))
        data = json.loads((out_path / 'data.json').read_text('utf8'))
    except IOError:
        raise NotExistent(
            f'export archive does not contain the required file {out_path}')

    verify_metadata_version(metadata, version='0.3')

    # Save pre-migration info
    links_count_org = len(data['links_uuid'])
    work_uuids = {
        value['uuid']
        for value in data['export_data']['Node'].values()
        if value['type'].startswith('calculation.function')
        or value['type'].startswith('calculation.work')
    }
    illegal_links = []
    for link in data['links_uuid']:
        if link['input'] in work_uuids and link['type'] == 'createlink':
            illegal_links.append(link)

    # Migrate to v0.4
    folder = CacheFolder(out_path)
    migrate_v3_to_v4(folder)
    _, metadata = folder.load_json('metadata.json')
    _, data = folder.load_json('data.json')
    verify_metadata_version(metadata, version='0.4')

    ## Following checks are based on the archive-file
    ## Which means there are more legal entities, they are simply not relevant here.

    # Check schema-changes
    new_node_attrs = {'node_type', 'process_type'}
    for change in new_node_attrs:
        # data.json
        for node in data['export_data']['Node'].values():
            assert change in node, f"'{change}' not found for {node}"
        # metadata.json
        assert change in metadata['all_fields_info'][
            'Node'], f"'{change}' not found in metadata.json for Node"

    # Check Node types
    legal_node_types = {
        'data.float.Float.', 'data.int.Int.', 'data.dict.Dict.',
        'data.code.Code.', 'data.structure.StructureData.',
        'data.folder.FolderData.', 'data.remote.RemoteData.',
        'data.upf.UpfData.', 'data.array.ArrayData.',
        'data.array.bands.BandsData.', 'data.array.kpoints.KpointsData.',
        'data.array.trajectory.TrajectoryData.',
        'process.workflow.workchain.WorkChainNode.',
        'process.calculation.calcjob.CalcJobNode.'
    }
    legal_process_types = {'', 'aiida.calculations:quantumespresso.pw'}
    for node in data['export_data']['Node'].values():
        assert node['node_type'] in legal_node_types, (
            f"{node['node_type']} is not a legal node_type. Legal node types: {legal_node_types}"
        )
        assert node['process_type'] in legal_process_types, (
            f"{node['process_type']} is not a legal process_type. Legal process types: {legal_node_types}"
        )

    # Check links
    # Make sure the two illegal create links were removed during the migration
    assert len(data['links_uuid']) == links_count_org - 2, (
        'Two of the org. {} links should have been removed during the migration, '
        'instead there are now {} links'.format(links_count_org,
                                                len(data['links_uuid'])))
    legal_link_types = {
        'unspecified', 'create', 'return', 'input_calc', 'input_work',
        'call_calc', 'call_work'
    }
    for link in data['links_uuid']:
        assert link['type'] in legal_link_types
    for link in illegal_links:
        assert link not in data[
            'links_uuid'], f'{link} should not be in the migrated archive file'

    # Check Groups
    # There is one Group in the archive file, it is a user group
    updated_attrs = {'label', 'type_string'}
    legal_group_type = {'user'}
    for attr in updated_attrs:
        # data.json
        for group in data['export_data']['Group'].values():
            assert attr in group, f'{attr} not found in Group {group}'
            assert group[
                'type_string'] in legal_group_type, f"{group['type_string']} is not a legal Group type_string"

        # metadata.json
        assert attr in metadata['all_fields_info'][
            'Group'], f'{attr} not found in metadata.json'

    # Check node_attributes*
    calcjob_nodes = []
    process_nodes = []
    for node_id, content in data['export_data']['Node'].items():
        if content['node_type'] == 'process.calculation.calcjob.CalcJobNode.':
            calcjob_nodes.append(node_id)
        elif content['node_type'].startswith('process.'):
            process_nodes.append(node_id)

    mandatory_updated_calcjob_attrs = {'resources', 'parser_name'}
    optional_updated_calcjob_attrs = {
        'custom_environment_variables': 'environment_variables'
    }
    updated_process_attrs = {'process_label'}
    fields = {'node_attributes', 'node_attributes_conversion'}
    for field in fields:
        for node_id in calcjob_nodes:
            for attr in mandatory_updated_calcjob_attrs:
                assert attr in data[field][node_id], (
                    f"Updated attribute name '{attr}' not found in {field} for node_id: {node_id}"
                )
            for old, new in optional_updated_calcjob_attrs.items():
                assert old not in data[field][node_id], (
                    "Old attribute '{}' found in {} for node_id: {}. "
                    "It should now be updated to '{}' or not exist".format(
                        old, field, node_id, new))
        for node_id in process_nodes:
            for attr in updated_process_attrs:
                assert attr in data[field][node_id], (
                    f"Updated attribute name '{attr}' not found in {field} for node_id: {node_id}"
                )

    # Check TrajectoryData
    # There should be minimum one TrajectoryData in the archive file
    trajectorydata_nodes = []
    for node_id, content in data['export_data']['Node'].items():
        if content['node_type'] == 'data.array.trajectory.TrajectoryData.':
            trajectorydata_nodes.append(node_id)

    updated_attrs = {'symbols'}
    fields = {'node_attributes', 'node_attributes_conversion'}
    for field in fields:
        for node_id in trajectorydata_nodes:
            for attr in updated_attrs:
                assert attr in data[field][node_id], (
                    f"Updated attribute name '{attr}' not found in {field} for TrajecteoryData node_id: {node_id}"
                )

    # Check Computer
    removed_attrs = {'enabled'}
    for attr in removed_attrs:
        # data.json
        for computer in data['export_data']['Computer'].values():
            assert attr not in computer, f"'{attr}' should have been removed from Computer {computer['name']}"

        # metadata.json
        assert attr not in metadata['all_fields_info']['Computer'], (
            f"'{attr}' should have been removed from Computer in metadata.json"
        )

    # Check new entities
    new_entities = {'Log', 'Comment'}
    fields = {'all_fields_info', 'unique_identifiers'}
    for entity in new_entities:
        for field in fields:
            assert entity in metadata[
                field], f'{entity} not found in {field} in metadata.json'

    # Check extras
    # Dicts with key, vales equal to node_id, {} should be present
    # This means they should be same length as data['export_data']['Node'] or 'node_attributes*'
    attrs_count = len(data['node_attributes'])
    new_fields = {'node_extras', 'node_extras_conversion'}
    for field in new_fields:
        assert field in list(
            data.keys()), f"New field '{field}' not found in data.json"
        assert len(data[field]) == attrs_count, (
            f"New field '{field}' found to have only {len(data[field])} entries, "
            f'but should have had {attrs_count} entries')