Beispiel #1
0
def test_migrations(migration_data, tmp_path):
    """Test each migration method from the `aiida.tools.importexport.archive.migrations` module."""
    version_old, (version_new, migration_method) = migration_data

    filepath_archive_new = get_archive_file(f'export_v{version_new}_simple.aiida', filepath='export/migrate')

    metadata_new = read_json_files(filepath_archive_new, names=['metadata.json'])[0]
    verify_metadata_version(metadata_new, version=version_new)
    data_new = read_json_files(filepath_archive_new, names=['data.json'])[0]

    filepath_archive_old = get_archive_file(f'export_v{version_old}_simple.aiida', filepath='export/migrate')

    out_path = tmp_path / 'out.aiida'
    with zipfile.ZipFile(filepath_archive_old, 'r', allowZip64=True) as handle:
        handle.extractall(out_path)

    folder = CacheFolder(out_path)
    migration_method(folder)

    _, metadata_old = folder.load_json('metadata.json')
    _, data_old = folder.load_json('data.json')

    verify_metadata_version(metadata_old, version=version_new)

    # Remove AiiDA version, since this may change regardless of the migration function
    metadata_old.pop('aiida_version')
    metadata_new.pop('aiida_version')

    # Assert conversion message in `metadata.json` is correct and then remove it for later assertions
    metadata_new.pop('conversion_info')
    message = f'Converted from version {version_old} to {version_new} with AiiDA v{get_version()}'
    assert metadata_old.pop('conversion_info')[-1] == message, 'Conversion message after migration is wrong'

    assert metadata_old == metadata_new
    assert data_old == data_new
Beispiel #2
0
def migration_trajectory_symbols_to_attribute(data: dict, folder: CacheFolder):
    """Apply migrations: 0026 - REV. 1.0.26 and 0027 - REV. 1.0.27
    Create the symbols attribute from the repository array for all `TrajectoryData` nodes.
    """
    from aiida.tools.importexport.common.config import NODES_EXPORT_SUBFOLDER

    path = folder.get_path(flush=False)

    for node_id, content in data['export_data'].get('Node', {}).items():
        if content.get('type',
                       '') == 'node.data.array.trajectory.TrajectoryData.':
            uuid = content['uuid']
            symbols_path = path.joinpath(NODES_EXPORT_SUBFOLDER, uuid[0:2],
                                         uuid[2:4], uuid[4:], 'path',
                                         'symbols.npy')
            symbols = np.load(os.path.abspath(symbols_path)).tolist()
            symbols_path.unlink()
            # Update 'node_attributes'
            data['node_attributes'][node_id].pop('array|symbols', None)
            data['node_attributes'][node_id]['symbols'] = symbols
            # Update 'node_attributes_conversion'
            data['node_attributes_conversion'][node_id].pop(
                'array|symbols', None)
            data['node_attributes_conversion'][node_id]['symbols'] = [
                None
            ] * len(symbols)
Beispiel #3
0
def migrate_v9_to_v10(folder: CacheFolder):
    """Migration of archive files from v0.9 to v0.10."""
    old_version = '0.9'
    new_version = '0.10'

    _, metadata = folder.load_json('metadata.json')

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    metadata['all_fields_info']['Node']['attributes'] = {
        'convert_type': 'jsonb'
    }
    metadata['all_fields_info']['Node']['extras'] = {'convert_type': 'jsonb'}
    metadata['all_fields_info']['Group']['extras'] = {'convert_type': 'jsonb'}

    folder.write_json('metadata.json', metadata)
Beispiel #4
0
    def _perform_migration(self, work_dir: Path, pathway: List[str],
                           out_compression: str,
                           out_path: Optional[Union[str, Path]]) -> Path:
        """Perform the migration(s) in the work directory, compress (if necessary),
        then move to the out_path (if not None).
        """
        MIGRATE_LOGGER.info('Extracting archive to work directory')

        extracted = Path(work_dir) / 'extracted'
        extracted.mkdir(parents=True)

        with get_progress_reporter()(total=1) as progress:
            callback = create_callback(progress)
            self._extract_archive(extracted, callback)

        with CacheFolder(extracted) as folder:
            with get_progress_reporter()(
                    total=len(pathway),
                    desc='Performing migrations: ') as progress:
                for from_version in pathway:
                    to_version = MIGRATE_FUNCTIONS[from_version][0]
                    progress.set_description_str(
                        f'Performing migrations: {from_version} -> {to_version}',
                        refresh=True)
                    try:
                        MIGRATE_FUNCTIONS[from_version][1](folder)
                    except DanglingLinkError:
                        raise ArchiveMigrationError(
                            'Archive file is invalid because it contains dangling links'
                        )
                    progress.update()
            MIGRATE_LOGGER.debug('Flushing cache')

        # re-compress archive
        if out_compression != 'none':
            MIGRATE_LOGGER.info(
                f"Re-compressing archive as '{out_compression}'")
            migrated = work_dir / 'compressed'
        else:
            migrated = extracted

        if out_compression == 'zip':
            self._compress_archive_zip(extracted, migrated,
                                       zipfile.ZIP_DEFLATED)
        elif out_compression == 'zip-uncompressed':
            self._compress_archive_zip(extracted, migrated, zipfile.ZIP_STORED)
        elif out_compression == 'tar.gz':
            self._compress_archive_tar(extracted, migrated)

        if out_path is not None:
            # move to final location
            MIGRATE_LOGGER.info('Moving archive to: %s', out_path)
            self._move_file(migrated, Path(out_path))

        return Path(out_path) if out_path else migrated
Beispiel #5
0
def migrate_v7_to_v8(folder: CacheFolder):
    """Migration of archive files from v0.7 to v0.8."""
    old_version = '0.7'
    new_version = '0.8'

    _, metadata = folder.load_json('metadata.json')

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    _, data = folder.load_json('data.json')

    # Apply migrations
    migration_default_link_label(data)

    folder.write_json('metadata.json', metadata)
    folder.write_json('data.json', data)
Beispiel #6
0
def migrate_v8_to_v9(folder: CacheFolder):
    """Migration of archive files from v0.8 to v0.9."""
    old_version = '0.8'
    new_version = '0.9'

    _, metadata = folder.load_json('metadata.json')

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    _, data = folder.load_json('data.json')

    # Apply migrations
    migration_dbgroup_type_string(data)

    folder.write_json('metadata.json', metadata)
    folder.write_json('data.json', data)
Beispiel #7
0
def migrate_v6_to_v7(folder: CacheFolder):
    """Migration of archive files from v0.6 to v0.7"""
    old_version = '0.6'
    new_version = '0.7'

    _, metadata = folder.load_json('metadata.json')

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    _, data = folder.load_json('data.json')

    # Apply migrations
    data_migration_legacy_process_attributes(data)
    remove_attribute_link_metadata(metadata)

    folder.write_json('metadata.json', metadata)
    folder.write_json('data.json', data)
Beispiel #8
0
def migrate_v5_to_v6(folder: CacheFolder):
    """Migration of archive files from v0.5 to v0.6"""
    old_version = '0.5'
    new_version = '0.6'

    _, metadata = folder.load_json('metadata.json')

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    _, data = folder.load_json('data.json')

    # Apply migrations
    migration_serialize_datetime_objects(data)
    migration_migrate_legacy_job_calculation_data(data)

    folder.write_json('metadata.json', metadata)
    folder.write_json('data.json', data)
Beispiel #9
0
def migrate_v4_to_v5(folder: CacheFolder):
    """
    Migration of archive files from v0.4 to v0.5

    This is from migration 0034 (drop_node_columns_nodeversion_public) and onwards
    """
    old_version = '0.4'
    new_version = '0.5'

    _, metadata = folder.load_json('metadata.json')

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    _, data = folder.load_json('data.json')
    # Apply migrations
    migration_drop_node_columns_nodeversion_public(metadata, data)
    migration_drop_computer_transport_params(metadata, data)

    folder.write_json('metadata.json', metadata)
    folder.write_json('data.json', data)
Beispiel #10
0
def migrate_v1_to_v2(folder: CacheFolder):
    """
    Migration of archive files from v0.1 to v0.2, which means generalizing the
    field names with respect to the database backend

    :param metadata: the content of an export archive metadata.json file
    :param data: the content of an export archive data.json file
    """
    old_version = '0.1'
    new_version = '0.2'

    old_start = 'aiida.djsite'
    new_start = 'aiida.backends.djsite'

    _, metadata = folder.load_json('metadata.json')

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    _, data = folder.load_json('data.json')

    for field in ['export_data']:
        for key in list(data[field]):
            if key.startswith(old_start):
                new_key = get_new_string(key, old_start, new_start)
                data[field][new_key] = data[field][key]
                del data[field][key]

    for field in ['unique_identifiers', 'all_fields_info']:
        for key in list(metadata[field].keys()):
            if key.startswith(old_start):
                new_key = get_new_string(key, old_start, new_start)
                metadata[field][new_key] = metadata[field][key]
                del metadata[field][key]

    metadata['all_fields_info'] = replace_requires(metadata['all_fields_info'], old_start, new_start)

    folder.write_json('metadata.json', metadata)
    folder.write_json('data.json', data)
Beispiel #11
0
def migrate_v3_to_v4(folder: CacheFolder):
    """
    Migration of archive files from v0.3 to v0.4

    Note concerning migration 0032 - REV. 1.0.32:
    Remove legacy workflow tables: DbWorkflow, DbWorkflowData, DbWorkflowStep
    These were (according to Antimo Marrazzo) never exported.
    """
    old_version = '0.3'
    new_version = '0.4'

    _, metadata = folder.load_json('metadata.json')

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    _, data = folder.load_json('data.json')

    # Apply migrations in correct sequential order
    migration_base_data_plugin_type_string(data)
    migration_process_type(metadata, data)
    migration_code_sub_class_of_data(data)
    migration_add_node_uuid_unique_constraint(data)
    migration_migrate_builtin_calculations(data)
    migration_provenance_redesign(data)
    migration_dbgroup_name_to_label_type_to_type_string(metadata, data)
    migration_dbgroup_type_string_change_content(data)
    migration_calc_job_option_attribute_keys(data)
    migration_move_data_within_node_module(data)
    migration_trajectory_symbols_to_attribute(data, folder)
    migration_remove_node_prefix(data)
    migration_rename_parameter_data_to_dict(data)
    migration_dbnode_type_to_dbnode_node_type(metadata, data)
    migration_remove_dbcomputer_enabled(metadata, data)
    migration_replace_text_field_with_json_field(data)

    # Add Node Extras
    add_extras(data)

    # Update metadata.json with the new Log and Comment entities
    new_entities = {
        'Log': {
            'uuid': {},
            'time': {
                'convert_type': 'date'
            },
            'loggername': {},
            'levelname': {},
            'message': {},
            'metadata': {},
            'dbnode': {
                'related_name': 'dblogs',
                'requires': 'Node'
            }
        },
        'Comment': {
            'uuid': {},
            'ctime': {
                'convert_type': 'date'
            },
            'mtime': {
                'convert_type': 'date'
            },
            'content': {},
            'dbnode': {
                'related_name': 'dbcomments',
                'requires': 'Node'
            },
            'user': {
                'related_name': 'dbcomments',
                'requires': 'User'
            }
        }
    }
    metadata['all_fields_info'].update(new_entities)
    metadata['unique_identifiers'].update({'Log': 'uuid', 'Comment': 'uuid'})

    folder.write_json('metadata.json', metadata)
    folder.write_json('data.json', data)
Beispiel #12
0
def migrate_v2_to_v3(folder: CacheFolder):
    """
    Migration of archive files from v0.2 to v0.3, which means adding the link
    types to the link entries and making the entity key names backend agnostic
    by effectively removing the prefix 'aiida.backends.djsite.db.models'

    :param data: the content of an export archive data.json file
    :param metadata: the content of an export archive metadata.json file
    """

    old_version = '0.2'
    new_version = '0.3'

    class LinkType(enum.Enum):
        """This was the state of the `aiida.common.links.LinkType` enum before aiida-core v1.0.0a5"""

        UNSPECIFIED = 'unspecified'
        CREATE = 'createlink'
        RETURN = 'returnlink'
        INPUT = 'inputlink'
        CALL = 'calllink'

    class NodeType(enum.Enum):
        """A simple enum of relevant node types"""

        NONE = 'none'
        CALC = 'calculation'
        CODE = 'code'
        DATA = 'data'
        WORK = 'work'

    entity_map = {
        'aiida.backends.djsite.db.models.DbNode': 'Node',
        'aiida.backends.djsite.db.models.DbLink': 'Link',
        'aiida.backends.djsite.db.models.DbGroup': 'Group',
        'aiida.backends.djsite.db.models.DbComputer': 'Computer',
        'aiida.backends.djsite.db.models.DbUser': '******',
        'aiida.backends.djsite.db.models.DbAttribute': 'Attribute'
    }

    _, metadata = folder.load_json('metadata.json')

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    _, data = folder.load_json('data.json')

    # Create a mapping from node uuid to node type
    mapping = {}
    for nodes in data['export_data'].values():
        for node in nodes.values():

            try:
                node_uuid = node['uuid']
                node_type_string = node['type']
            except KeyError:
                continue

            if node_type_string.startswith('calculation.job.'):
                node_type = NodeType.CALC
            elif node_type_string.startswith('calculation.inline.'):
                node_type = NodeType.CALC
            elif node_type_string.startswith('code.Code'):
                node_type = NodeType.CODE
            elif node_type_string.startswith('data.'):
                node_type = NodeType.DATA
            elif node_type_string.startswith('calculation.work.'):
                node_type = NodeType.WORK
            else:
                node_type = NodeType.NONE

            mapping[node_uuid] = node_type

    # For each link, deduce the link type and insert it in place
    for link in data['links_uuid']:

        try:
            input_type = NodeType(mapping[link['input']])
            output_type = NodeType(mapping[link['output']])
        except KeyError:
            raise DanglingLinkError(f"Unknown node UUID {link['input']} or {link['output']}")

        # The following table demonstrates the logic for inferring the link type
        # (CODE, DATA) -> (WORK, CALC) : INPUT
        # (CALC)       -> (DATA)       : CREATE
        # (WORK)       -> (DATA)       : RETURN
        # (WORK)       -> (CALC, WORK) : CALL
        if input_type in [NodeType.CODE, NodeType.DATA] and output_type in [NodeType.CALC, NodeType.WORK]:
            link['type'] = LinkType.INPUT.value
        elif input_type == NodeType.CALC and output_type == NodeType.DATA:
            link['type'] = LinkType.CREATE.value
        elif input_type == NodeType.WORK and output_type == NodeType.DATA:
            link['type'] = LinkType.RETURN.value
        elif input_type == NodeType.WORK and output_type in [NodeType.CALC, NodeType.WORK]:
            link['type'] = LinkType.CALL.value
        else:
            link['type'] = LinkType.UNSPECIFIED.value

    # Now we migrate the entity key names i.e. removing the 'aiida.backends.djsite.db.models' prefix
    for field in ['unique_identifiers', 'all_fields_info']:
        for old_key, new_key in entity_map.items():
            if old_key in metadata[field]:
                metadata[field][new_key] = metadata[field][old_key]
                del metadata[field][old_key]

    # Replace the 'requires' keys in the nested dictionaries in 'all_fields_info'
    for entity in metadata['all_fields_info'].values():
        for prop in entity.values():
            for key, value in prop.items():
                if key == 'requires' and value in entity_map:
                    prop[key] = entity_map[value]

    # Replace any present keys in the data.json
    for field in ['export_data']:
        for old_key, new_key in entity_map.items():
            if old_key in data[field]:
                data[field][new_key] = data[field][old_key]
                del data[field][old_key]

    folder.write_json('metadata.json', metadata)
    folder.write_json('data.json', data)