Exemple #1
0
def migrate_v7_to_v8(metadata, data, *args):  # pylint: disable=unused-argument
    """Migration of export files from v0.7 to v0.8."""
    old_version = '0.7'
    new_version = '0.8'

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    # Apply migrations
    migration_default_link_label(data)
Exemple #2
0
def migrate_v8_to_v9(metadata, data, *args):  # pylint: disable=unused-argument
    """Migration of export files from v0.8 to v0.9."""
    old_version = '0.8'
    new_version = '0.9'

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    # Apply migrations
    migration_dbgroup_type_string(data)
    def test_migrate_v6_to_v7_complete(self):
        """Test migration for file containing complete v0.6 era possibilities"""
        # Get metadata.json and data.json as dicts from v0.6 file archive
        metadata, data = get_json_files('export_v0.6_manual.aiida',
                                        **self.external_archive)
        verify_metadata_version(metadata, version='0.6')

        # Migrate to v0.7
        migrate_v6_to_v7(metadata, data)
        verify_metadata_version(metadata, version='0.7')

        self.maxDiff = None  # pylint: disable=invalid-name
        # Check attributes of process.* nodes
        illegal_attrs = {
            '_sealed', '_finished', '_failed', '_aborted', '_do_abort'
        }
        new_attrs = {'sealed': True}
        for node_pk, attrs in data['node_attributes'].items():
            if data['export_data']['Node'][node_pk]['node_type'].startswith(
                    'process.'):
                # Check if illegal attributes were removed successfully
                for attr in illegal_attrs:
                    self.assertNotIn(
                        attr,
                        attrs,
                        msg=
                        "key '{}' should have been removed from attributes for Node <pk={}>"
                        .format(attr, node_pk))

                # Check new attributes were added successfully
                for attr in new_attrs:
                    self.assertIn(
                        attr,
                        attrs,
                        msg=
                        "key '{}' was not added to attributes for Node <pk={}>"
                        .format(attr, node_pk))
                    self.assertEqual(
                        attrs[attr],
                        new_attrs[attr],
                        msg=
                        "key '{}' should have had the value {}, but did instead have {}"
                        .format(attr, new_attrs[attr], attrs[attr]))

        # Check Attribute and Link have been removed
        illegal_entities = {'Attribute', 'Link'}
        for dict_ in {'unique_identifiers', 'all_fields_info'}:
            for entity in illegal_entities:
                self.assertNotIn(
                    entity,
                    metadata[dict_],
                    msg=
                    "key '{}' should have been removed from '{}' in metadata.json"
                    .format(entity, dict_))
Exemple #4
0
def migrate_v6_to_v7(metadata, data, *args):  # pylint: disable=unused-argument
    """Migration of export files from v0.6 to v0.7"""
    old_version = '0.6'
    new_version = '0.7'

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    # Apply migrations
    migration_data_migration_legacy_process_attributes(data)
    remove_attribute_link_metadata(metadata)
Exemple #5
0
def migrate_v5_to_v6(metadata, data, *args):  # pylint: disable=unused-argument
    """Migration of export files from v0.5 to v0.6"""
    old_version = '0.5'
    new_version = '0.6'

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    # Apply migrations
    migration_serialize_datetime_objects(data)
    migration_migrate_legacy_job_calculation_data(data)
    def test_migrate_v2_to_v3_complete(self):
        """Test migration for file containing complete v0.2 era possibilities"""

        # Get metadata.json and data.json as dicts from v0.2 file archive
        metadata, data = get_json_files('export_v0.2.aiida',
                                        **self.external_archive)
        verify_metadata_version(metadata, version='0.2')

        # Migrate to v0.3
        migrate_v2_to_v3(metadata, data)
        verify_metadata_version(metadata, version='0.3')

        self.maxDiff = None  # pylint: disable=invalid-name
        # Check link types
        legal_link_types = {
            'unspecified', 'createlink', 'returnlink', 'inputlink', 'calllink'
        }
        for link in data['links_uuid']:
            self.assertIn(
                'type',
                link,
                msg="key 'type' was not added to link: {}".format(link))
            self.assertIn(link['type'], legal_link_types)

        # Check entity names
        legal_entity_names = {
            'Node', 'Link', 'Group', 'Computer', 'User', 'Attribute'
        }
        for field in {'unique_identifiers', 'all_fields_info'}:
            for entity, prop in metadata[field].items():
                self.assertIn(
                    entity,
                    legal_entity_names,
                    msg=
                    "'{}' should now be equal to anyone of these: {}, but is not"
                    .format(entity, legal_entity_names))

                if field == 'all_fields_info':
                    for value in prop.values():
                        if 'requires' in value:
                            self.assertIn(
                                value['requires'],
                                legal_entity_names,
                                msg=
                                "'{}' should now be equal to anyone of these: {}, but is not"
                                .format(value, legal_entity_names))

        for entity in data['export_data']:
            self.assertIn(
                entity,
                legal_entity_names,
                msg="'{}' should now be equal to anyone of these: {}, but is not"
                .format(entity, legal_entity_names))
Exemple #7
0
def migrate_v1_to_v2(metadata, data, *args):
    """
    Migration of export files from v0.1 to v0.2, which means generalizing the
    field names with respect to the database backend

    :param metadata: the content of an export archive metadata.json file
    :param data: the content of an export archive data.json file
    """
    old_version = '0.1'
    new_version = '0.2'

    old_start = 'aiida.djsite'
    new_start = 'aiida.backends.djsite'

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    def get_new_string(old_string):
        """Replace the old module prefix with the new."""
        if old_string.startswith(old_start):
            return '{}{}'.format(new_start, old_string[len(old_start):])

        return old_string

    def replace_requires(data):
        """Replace the requires keys with new module path."""
        if isinstance(data, dict):
            new_data = {}
            for key, value in data.items():
                if key == 'requires' and value.startswith(old_start):
                    new_data[key] = get_new_string(value)
                else:
                    new_data[key] = replace_requires(value)
            return new_data

        return data

    for field in ['export_data']:
        for key in list(data[field]):
            if key.startswith(old_start):
                new_key = get_new_string(key)
                data[field][new_key] = data[field][key]
                del data[field][key]

    for field in ['unique_identifiers', 'all_fields_info']:
        for key in list(metadata[field].keys()):
            if key.startswith(old_start):
                new_key = get_new_string(key)
                metadata[field][new_key] = metadata[field][key]
                del metadata[field][key]

    metadata['all_fields_info'] = replace_requires(metadata['all_fields_info'])
Exemple #8
0
def migrate_v4_to_v5(metadata, data, *args):  # pylint: disable=unused-argument
    """
    Migration of export files from v0.4 to v0.5

    This is from migration 0034 (drop_node_columns_nodeversion_public) and onwards
    """
    old_version = '0.4'
    new_version = '0.5'

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    # Apply migrations
    migration_drop_node_columns_nodeversion_public(metadata, data)
    migration_drop_computer_transport_params(metadata, data)
Exemple #9
0
    def migrate(self, filename_archive, version_old, version_new, migration_method):
        """Migrate one of the archives from `aiida-export-migration-tests`.

        :param filename_archive: the relative file name of the archive
        :param version_old: version of the archive
        :param version_new: version to migrate to
        :param migration_method: the migration method that should convert between version_old and version_new
        :return: the migrated metadata and data as a tuple
        """
        metadata, data = get_json_files(filename_archive, **self.external_archive)
        verify_metadata_version(metadata, version=version_old)

        migration_method(metadata, data)
        verify_metadata_version(metadata, version=version_new)

        return metadata, data
    def test_migrate_v5_to_v6_calc_states(self):
        """Test the data migration of legacy `JobCalcState` attributes.

        This test has to use a local archive because the current archive from the `aiida-export-migration-tests`
        module does not include a `CalcJobNode` with a legacy `state` attribute.
        """
        # Get metadata.json and data.json as dicts from v0.5 file archive
        metadata, data = get_json_files('export_v0.5_simple.aiida',
                                        **self.core_archive)
        verify_metadata_version(metadata, version='0.5')

        calc_job_node_type = 'process.calculation.calcjob.CalcJobNode.'
        node_data = data['export_data'].get('Node', {})
        node_attributes = data['node_attributes']
        calc_jobs = {}
        for pk, values in node_data.items():
            if values['node_type'] == calc_job_node_type and 'state' in data[
                    'node_attributes'].get(pk, {}):
                calc_jobs[pk] = data['node_attributes'][pk]['state']

        # Migrate to v0.6
        migrate_v5_to_v6(metadata, data)
        verify_metadata_version(metadata, version='0.6')

        node_attributes = data['node_attributes']

        # The export archive contains a single `CalcJobNode` that had `state=FINISHED`.
        for pk, state in calc_jobs.items():

            attributes = node_attributes[pk]

            if STATE_MAPPING[state].exit_status is not None:
                self.assertEqual(attributes['exit_status'],
                                 STATE_MAPPING[state].exit_status)

            if STATE_MAPPING[state].process_state is not None:
                self.assertEqual(attributes['process_state'],
                                 STATE_MAPPING[state].process_state)

            if STATE_MAPPING[state].process_status is not None:
                self.assertEqual(attributes['process_status'],
                                 STATE_MAPPING[state].process_status)

            self.assertEqual(attributes['process_label'],
                             'Legacy JobCalculation')
Exemple #11
0
    def test_migrate_v7_to_v8_complete(self):
        """Test migration for file containing complete v0.7 era possibilities"""
        # Get metadata.json and data.json as dicts from v0.7 file archive
        metadata, data = get_json_files('export_v0.7_manual.aiida',
                                        **self.external_archive)
        verify_metadata_version(metadata, version='0.7')

        # Migrate to v0.8
        migrate_v7_to_v8(metadata, data)
        verify_metadata_version(metadata, version='0.8')

        self.maxDiff = None  # pylint: disable=invalid-name
        # Check that no links have the label '_return', since it should now be 'result'
        illegal_label = '_return'
        for link in data.get('links_uuid'):
            self.assertFalse(
                link['label'] == illegal_label,
                msg='The illegal link label {} was not expected to be present - '
                "it should now be 'result'".format(illegal_label))
    def test_migrate_v4_to_v5_complete(self):
        """Test migration for file containing complete v0.4 era possibilities"""

        # Get metadata.json and data.json as dicts from v0.4 file archive
        metadata, data = get_json_files('export_v0.4.aiida',
                                        **self.external_archive)
        verify_metadata_version(metadata, version='0.4')

        # Migrate to v0.5
        migrate_v4_to_v5(metadata, data)
        verify_metadata_version(metadata, version='0.5')

        self.maxDiff = None  # pylint: disable=invalid-name
        # Check schema-changes
        removed_computer_attrs = {'transport_params'}
        removed_node_attrs = {'nodeversion', 'public'}
        for change in removed_computer_attrs:
            # data.json
            for computer in data['export_data']['Computer'].values():
                self.assertNotIn(change,
                                 computer,
                                 msg="'{}' unexpectedly found for {}".format(
                                     change, computer))
            # metadata.json
            self.assertNotIn(
                change,
                metadata['all_fields_info']['Computer'],
                msg="'{}' unexpectedly found in metadata.json for Computer".
                format(change))
        for change in removed_node_attrs:
            # data.json
            for node in data['export_data']['Node'].values():
                self.assertNotIn(change,
                                 node,
                                 msg="'{}' unexpectedly found for {}".format(
                                     change, node))
            # metadata.json
            self.assertNotIn(
                change,
                metadata['all_fields_info']['Node'],
                msg="'{}' unexpectedly found in metadata.json for Node".format(
                    change))
Exemple #13
0
def migration_data(request):
    """For a given tuple of two subsequent versions and corresponding migration method, return metadata and data."""
    version_old, version_new, migration_method = request.param

    filepath_archive = 'export_v{}_simple.aiida'.format(version_new)
    metadata_new, data_new = get_json_files(filepath_archive,
                                            filepath='export/migrate')
    verify_metadata_version(metadata_new, version=version_new)

    filepath_archive = get_archive_file(
        'export_v{}_simple.aiida'.format(version_old),
        filepath='export/migrate')

    with Archive(filepath_archive) as archive:
        metadata_old = copy.deepcopy(archive.meta_data)
        data_old = copy.deepcopy(archive.data)

        migration_method(metadata_old, data_old, archive.folder)
        verify_metadata_version(metadata_old, version=version_new)

    yield version_old, version_new, metadata_old, metadata_new, data_old, data_new
    def test_migrate_v5_to_v6_complete(self):
        """Test migration for file containing complete v0.5 era possibilities"""
        # Get metadata.json and data.json as dicts from v0.5 file archive
        metadata, data = get_json_files('export_v0.5_manual.aiida',
                                        **self.external_archive)
        verify_metadata_version(metadata, version='0.5')

        # Migrate to v0.6
        migrate_v5_to_v6(metadata, data)
        verify_metadata_version(metadata, version='0.6')

        self.maxDiff = None  # pylint: disable=invalid-name
        # Explicitly check that conversion dictionaries were removed
        illegal_data_dicts = {
            'node_attributes_conversion', 'node_extras_conversion'
        }
        for dict_ in illegal_data_dicts:
            self.assertNotIn(
                dict_,
                data,
                msg="dictionary '{}' should have been removed from data.json".
                format(dict_))
Exemple #15
0
    def test_migrate_v3_to_v4(self):
        """Test function migrate_v3_to_v4"""
        from aiida import get_version

        # Get metadata.json and data.json as dicts from v0.4 file archive
        metadata_v4, data_v4 = get_json_files('export_v0.4_simple.aiida', **self.core_archive)
        verify_metadata_version(metadata_v4, version='0.4')

        # Get metadata.json and data.json as dicts from v0.3 file archive
        # Cannot use 'get_json_files' for 'export_v0.3_simple.aiida',
        # because we need to pass the SandboxFolder to 'migrate_v3_to_v4'
        dirpath_archive = get_archive_file('export_v0.3_simple.aiida', **self.core_archive)

        with SandboxFolder(sandbox_in_repo=False) as folder:
            if zipfile.is_zipfile(dirpath_archive):
                extract_zip(dirpath_archive, folder, silent=True)
            elif tarfile.is_tarfile(dirpath_archive):
                extract_tar(dirpath_archive, folder, silent=True)
            else:
                raise ValueError('invalid file format, expected either a zip archive or gzipped tarball')

            try:
                with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle:
                    data_v3 = jsonload(fhandle)
                with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle:
                    metadata_v3 = jsonload(fhandle)
            except IOError:
                raise NotExistent('export archive does not contain the required file {}'.format(fhandle.filename))

            verify_metadata_version(metadata_v3, version='0.3')

            # Migrate to v0.4
            migrate_v3_to_v4(metadata_v3, data_v3, folder)
            verify_metadata_version(metadata_v3, version='0.4')

        # Remove AiiDA version, since this may change irregardless of the migration function
        metadata_v3.pop('aiida_version')
        metadata_v4.pop('aiida_version')

        # Assert conversion message in `metadata.json` is correct and then remove it for later assertions
        self.maxDiff = None  # pylint: disable=invalid-name
        conversion_message = 'Converted from version 0.3 to 0.4 with AiiDA v{}'.format(get_version())
        self.assertEqual(
            metadata_v3.pop('conversion_info')[-1],
            conversion_message,
            msg='The conversion message after migration is wrong'
        )
        metadata_v4.pop('conversion_info')

        # Assert changes were performed correctly
        self.assertDictEqual(
            metadata_v3,
            metadata_v4,
            msg='After migration, metadata.json should equal intended metadata.json from archives'
        )
        self.assertDictEqual(
            data_v3, data_v4, msg='After migration, data.json should equal intended data.json from archives'
        )
    def test_migrate_v5_to_v6_datetime(self):
        """Test the data migration of serialized datetime objects.

        Datetime attributes were serialized into strings, by first converting to UTC and then printing with the format
        '%Y-%m-%dT%H:%M:%S.%f'. In the database migration, datetimes were serialized *including* timezone information.
        Here we test that the archive migration correctly reattaches the timezone information. The archive that we are
        using `export_v0.5_simple.aiida` contains a node with the attribute "scheduler_lastchecktime".
        """
        # Get metadata.json and data.json as dicts from v0.5 file archive
        metadata, data = get_json_files('export_v0.5_simple.aiida',
                                        **self.core_archive)
        verify_metadata_version(metadata, version='0.5')

        for key, values in data['node_attributes'].items():
            if 'scheduler_lastchecktime' not in values:
                continue

            serialized_original = values['scheduler_lastchecktime']
            msg = 'the serialized datetime before migration should not contain a plus: {}'.format(
                serialized_original)
            self.assertTrue('+' not in serialized_original, msg=msg)

            # Migrate to v0.6
            migrate_v5_to_v6(metadata, data)
            verify_metadata_version(metadata, version='0.6')

            serialized_migrated = data['node_attributes'][key][
                'scheduler_lastchecktime']
            self.assertEqual(serialized_migrated,
                             serialized_original + '+00:00')
            break

        else:
            raise RuntimeError(
                'the archive `export_v0.5_simple.aiida` did not contain a node with the attribute '
                '`scheduler_lastchecktime` which is required for this test.')
Exemple #17
0
    def test_migrate_v7_to_v8(self):
        """Test migration for file containing complete v0.7 era possibilities"""
        from aiida import get_version

        # Get metadata.json and data.json as dicts from v0.7 file archive
        metadata_v7, data_v7 = get_json_files('export_v0.7_simple.aiida',
                                              **self.core_archive)
        verify_metadata_version(metadata_v7, version='0.7')

        # Get metadata.json and data.json as dicts from v0.8 file archive
        metadata_v8, data_v8 = get_json_files('export_v0.8_simple.aiida',
                                              **self.core_archive)
        verify_metadata_version(metadata_v8, version='0.8')

        # Migrate to v0.8
        migrate_v7_to_v8(metadata_v7, data_v7)
        verify_metadata_version(metadata_v7, version='0.8')

        # Remove AiiDA version, since this may change irregardless of the migration function
        metadata_v7.pop('aiida_version')
        metadata_v8.pop('aiida_version')

        # Assert conversion message in `metadata.json` is correct and then remove it for later assertions
        self.maxDiff = None  # pylint: disable=invalid-name
        conversion_message = 'Converted from version 0.7 to 0.8 with AiiDA v{}'.format(
            get_version())
        self.assertEqual(metadata_v7.pop('conversion_info')[-1],
                         conversion_message,
                         msg='The conversion message after migration is wrong')
        metadata_v8.pop('conversion_info')

        # Assert changes were performed correctly
        self.assertDictEqual(
            metadata_v7,
            metadata_v8,
            msg=
            'After migration, metadata.json should equal intended metadata.json from archives'
        )
        self.assertDictEqual(
            data_v7,
            data_v8,
            msg=
            'After migration, data.json should equal intended data.json from archives'
        )
    def test_migrate_v2_to_v3(self):
        """Test function migrate_v2_to_v3"""
        from aiida import get_version

        # Get metadata.json and data.json as dicts from v0.2 file archive
        metadata_v2, data_v2 = get_json_files('export_v0.2_simple.aiida',
                                              **self.core_archive)
        verify_metadata_version(metadata_v2, version='0.2')

        # Get metadata.json and data.json as dicts from v0.3 file archive
        metadata_v3, data_v3 = get_json_files('export_v0.3_simple.aiida',
                                              **self.core_archive)
        verify_metadata_version(metadata_v3, version='0.3')

        # Migrate to v0.3
        migrate_v2_to_v3(metadata_v2, data_v2)
        verify_metadata_version(metadata_v2, version='0.3')

        # Remove AiiDA version, since this may change irregardless of the migration function
        metadata_v2.pop('aiida_version')
        metadata_v3.pop('aiida_version')

        # Assert conversion message in `metadata.json` is correct and then remove it for later assertions
        conversion_message = 'Converted from version 0.2 to 0.3 with AiiDA v{}'.format(
            get_version())
        self.assertEqual(metadata_v2.pop('conversion_info')[-1],
                         conversion_message,
                         msg='The conversion message after migration is wrong')
        metadata_v3.pop('conversion_info')

        # Assert changes were performed correctly
        self.maxDiff = None  # pylint: disable=invalid-name
        self.assertDictEqual(
            metadata_v2,
            metadata_v3,
            msg=
            'After migration, metadata.json should equal intended metadata.json from archives'
        )
        self.assertDictEqual(
            data_v2,
            data_v3,
            msg=
            'After migration, data.json should equal intended data.json from archives'
        )
Exemple #19
0
def migrate_v2_to_v3(metadata, data, *args):
    """
    Migration of export files from v0.2 to v0.3, which means adding the link
    types to the link entries and making the entity key names backend agnostic
    by effectively removing the prefix 'aiida.backends.djsite.db.models'

    :param data: the content of an export archive data.json file
    :param metadata: the content of an export archive metadata.json file
    """

    old_version = '0.2'
    new_version = '0.3'

    class LinkType(enum.Enum):  # pylint: disable=too-few-public-methods
        """This was the state of the `aiida.common.links.LinkType` enum before aiida-core v1.0.0a5"""

        UNSPECIFIED = 'unspecified'
        CREATE = 'createlink'
        RETURN = 'returnlink'
        INPUT = 'inputlink'
        CALL = 'calllink'

    class NodeType(enum.Enum):  # pylint: disable=too-few-public-methods
        """A simple enum of relevant node types"""

        NONE = 'none'
        CALC = 'calculation'
        CODE = 'code'
        DATA = 'data'
        WORK = 'work'

    entity_map = {
        'aiida.backends.djsite.db.models.DbNode': 'Node',
        'aiida.backends.djsite.db.models.DbLink': 'Link',
        'aiida.backends.djsite.db.models.DbGroup': 'Group',
        'aiida.backends.djsite.db.models.DbComputer': 'Computer',
        'aiida.backends.djsite.db.models.DbUser': '******',
        'aiida.backends.djsite.db.models.DbAttribute': 'Attribute'
    }

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    # Create a mapping from node uuid to node type
    mapping = {}
    for nodes in data['export_data'].values():
        for node in nodes.values():

            try:
                node_uuid = node['uuid']
                node_type_string = node['type']
            except KeyError:
                continue

            if node_type_string.startswith('calculation.job.'):
                node_type = NodeType.CALC
            elif node_type_string.startswith('calculation.inline.'):
                node_type = NodeType.CALC
            elif node_type_string.startswith('code.Code'):
                node_type = NodeType.CODE
            elif node_type_string.startswith('data.'):
                node_type = NodeType.DATA
            elif node_type_string.startswith('calculation.work.'):
                node_type = NodeType.WORK
            else:
                node_type = NodeType.NONE

            mapping[node_uuid] = node_type

    # For each link, deduce the link type and insert it in place
    for link in data['links_uuid']:

        try:
            input_type = NodeType(mapping[link['input']])
            output_type = NodeType(mapping[link['output']])
        except KeyError:
            raise DanglingLinkError('Unknown node UUID {} or {}'.format(
                link['input'], link['output']))

        # The following table demonstrates the logic for inferring the link type
        # (CODE, DATA) -> (WORK, CALC) : INPUT
        # (CALC)       -> (DATA)       : CREATE
        # (WORK)       -> (DATA)       : RETURN
        # (WORK)       -> (CALC, WORK) : CALL
        if input_type in [NodeType.CODE, NodeType.DATA
                          ] and output_type in [NodeType.CALC, NodeType.WORK]:
            link['type'] = LinkType.INPUT.value
        elif input_type == NodeType.CALC and output_type == NodeType.DATA:
            link['type'] = LinkType.CREATE.value
        elif input_type == NodeType.WORK and output_type == NodeType.DATA:
            link['type'] = LinkType.RETURN.value
        elif input_type == NodeType.WORK and output_type in [
                NodeType.CALC, NodeType.WORK
        ]:
            link['type'] = LinkType.CALL.value
        else:
            link['type'] = LinkType.UNSPECIFIED.value

    # Now we migrate the entity key names i.e. removing the 'aiida.backends.djsite.db.models' prefix
    for field in ['unique_identifiers', 'all_fields_info']:
        for old_key, new_key in entity_map.items():
            if old_key in metadata[field]:
                metadata[field][new_key] = metadata[field][old_key]
                del metadata[field][old_key]

    # Replace the 'requires' keys in the nested dictionaries in 'all_fields_info'
    for entity in metadata['all_fields_info'].values():
        for prop in entity.values():
            for key, value in prop.items():
                if key == 'requires' and value in entity_map:
                    prop[key] = entity_map[value]

    # Replace any present keys in the data.json
    for field in ['export_data']:
        for old_key, new_key in entity_map.items():
            if old_key in data[field]:
                data[field][new_key] = data[field][old_key]
                del data[field][old_key]
Exemple #20
0
    def test_migrate_v3_to_v4_complete(self):
        """Test migration for file containing complete v0.3 era possibilities"""

        # Get metadata.json and data.json as dicts from v0.3 file archive
        dirpath_archive = get_archive_file('export_v0.3.aiida', **self.external_archive)

        # Migrate
        with SandboxFolder(sandbox_in_repo=False) as folder:
            if zipfile.is_zipfile(dirpath_archive):
                extract_zip(dirpath_archive, folder, silent=True)
            elif tarfile.is_tarfile(dirpath_archive):
                extract_tar(dirpath_archive, folder, silent=True)
            else:
                raise ValueError('invalid file format, expected either a zip archive or gzipped tarball')

            try:
                with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle:
                    data = jsonload(fhandle)
                with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle:
                    metadata = jsonload(fhandle)
            except IOError:
                raise NotExistent('export archive does not contain the required file {}'.format(fhandle.filename))

            verify_metadata_version(metadata, version='0.3')

            # Save pre-migration info
            links_count_org = len(data['links_uuid'])
            work_uuids = {
                value['uuid']
                for value in data['export_data']['Node'].values()
                if value['type'].startswith('calculation.function') or value['type'].startswith('calculation.work')
            }
            illegal_links = []
            for link in data['links_uuid']:
                if link['input'] in work_uuids and link['type'] == 'createlink':
                    illegal_links.append(link)

            # Migrate to v0.4
            migrate_v3_to_v4(metadata, data, folder)
            verify_metadata_version(metadata, version='0.4')

        ## Following checks are based on the archive-file
        ## Which means there are more legal entities, they are simply not relevant here.

        self.maxDiff = None  # pylint: disable=invalid-name
        # Check schema-changes
        new_node_attrs = {'node_type', 'process_type'}
        for change in new_node_attrs:
            # data.json
            for node in data['export_data']['Node'].values():
                self.assertIn(change, node, msg="'{}' not found for {}".format(change, node))
            # metadata.json
            self.assertIn(
                change,
                metadata['all_fields_info']['Node'],
                msg="'{}' not found in metadata.json for Node".format(change)
            )

        # Check Node types
        legal_node_types = {
            'data.float.Float.', 'data.int.Int.', 'data.dict.Dict.', 'data.code.Code.', 'data.structure.StructureData.',
            'data.folder.FolderData.', 'data.remote.RemoteData.', 'data.upf.UpfData.', 'data.array.ArrayData.',
            'data.array.bands.BandsData.', 'data.array.kpoints.KpointsData.', 'data.array.trajectory.TrajectoryData.',
            'process.workflow.workchain.WorkChainNode.', 'process.calculation.calcjob.CalcJobNode.'
        }
        legal_process_types = {'', 'aiida.calculations:quantumespresso.pw'}
        for node in data['export_data']['Node'].values():
            self.assertIn(
                node['node_type'],
                legal_node_types,
                msg='{} is not a legal node_type. Legal node types: {}'.format(node['node_type'], legal_node_types)
            )
            self.assertIn(
                node['process_type'],
                legal_process_types,
                msg='{} is not a legal process_type. Legal process types: {}'.format(
                    node['process_type'], legal_node_types
                )
            )

        # Check links
        # Make sure the two illegal create links were removed during the migration
        self.assertEqual(
            len(data['links_uuid']),
            links_count_org - 2,
            msg='Two of the org. {} links should have been removed during the migration, '
            'instead there are now {} links'.format(links_count_org, len(data['links_uuid']))
        )
        legal_link_types = {'unspecified', 'create', 'return', 'input_calc', 'input_work', 'call_calc', 'call_work'}
        for link in data['links_uuid']:
            self.assertIn(link['type'], legal_link_types)
        for link in illegal_links:
            self.assertNotIn(link, data['links_uuid'], msg='{} should not be in the migrated export file'.format(link))

        # Check Groups
        # There is one Group in the export file, it is a user group
        updated_attrs = {'label', 'type_string'}
        legal_group_type = {'user'}
        for attr in updated_attrs:
            # data.json
            for group in data['export_data']['Group'].values():
                self.assertIn(attr, group, msg='{} not found in Group {}'.format(attr, group))
                self.assertIn(
                    group['type_string'],
                    legal_group_type,
                    msg='{} is not a legal Group type_string'.format(group['type_string'])
                )
            # metadata.json
            self.assertIn(attr, metadata['all_fields_info']['Group'], msg='{} not found in metadata.json'.format(attr))

        # Check node_attributes*
        calcjob_nodes = []
        process_nodes = []
        for node_id, content in data['export_data']['Node'].items():
            if content['node_type'] == 'process.calculation.calcjob.CalcJobNode.':
                calcjob_nodes.append(node_id)
            elif content['node_type'].startswith('process.'):
                process_nodes.append(node_id)

        mandatory_updated_calcjob_attrs = {'resources', 'parser_name'}
        optional_updated_calcjob_attrs = {'custom_environment_variables': 'environment_variables'}
        updated_process_attrs = {'process_label'}
        fields = {'node_attributes', 'node_attributes_conversion'}
        for field in fields:
            for node_id in calcjob_nodes:
                for attr in mandatory_updated_calcjob_attrs:
                    self.assertIn(
                        attr,
                        data[field][node_id],
                        msg="Updated attribute name '{}' not found in {} for node_id: {}".format(attr, field, node_id)
                    )
                for old, new in optional_updated_calcjob_attrs.items():
                    self.assertNotIn(
                        old,
                        data[field][node_id],
                        msg="Old attribute '{}' found in {} for node_id: {}. "
                        "It should now be updated to '{}' or not exist".format(old, field, node_id, new)
                    )
            for node_id in process_nodes:
                for attr in updated_process_attrs:
                    self.assertIn(
                        attr,
                        data[field][node_id],
                        msg="Updated attribute name '{}' not found in {} for node_id: {}".format(attr, field, node_id)
                    )

        # Check TrajectoryData
        # There should be minimum one TrajectoryData in the export file
        trajectorydata_nodes = []
        for node_id, content in data['export_data']['Node'].items():
            if content['node_type'] == 'data.array.trajectory.TrajectoryData.':
                trajectorydata_nodes.append(node_id)

        updated_attrs = {'symbols'}
        fields = {'node_attributes', 'node_attributes_conversion'}
        for field in fields:
            for node_id in trajectorydata_nodes:
                for attr in updated_attrs:
                    self.assertIn(
                        attr,
                        data[field][node_id],
                        msg="Updated attribute name '{}' not found in {} for TrajecteoryData node_id: {}".format(
                            attr, field, node_id
                        )
                    )

        # Check Computer
        removed_attrs = {'enabled'}
        for attr in removed_attrs:
            # data.json
            for computer in data['export_data']['Computer'].values():
                self.assertNotIn(
                    attr, computer, msg="'{}' should have been removed from Computer {}".format(attr, computer['name'])
                )
            # metadata.json
            self.assertNotIn(
                attr,
                metadata['all_fields_info']['Computer'],
                msg="'{}' should have been removed from Computer in metadata.json".format(attr)
            )

        # Check new entities
        new_entities = {'Log', 'Comment'}
        fields = {'all_fields_info', 'unique_identifiers'}
        for entity in new_entities:
            for field in fields:
                self.assertIn(entity, metadata[field], msg='{} not found in {} in metadata.json'.format(entity, field))

        # Check extras
        # Dicts with key, vales equal to node_id, {} should be present
        # This means they should be same length as data['export_data']['Node'] or 'node_attributes*'
        attrs_count = len(data['node_attributes'])
        new_fields = {'node_extras', 'node_extras_conversion'}
        for field in new_fields:
            self.assertIn(field, list(data.keys()), msg="New field '{}' not found in data.json".format(field))
            self.assertEqual(
                len(data[field]),
                attrs_count,
                msg="New field '{}' found to have only {} entries, but should have had {} entries".format(
                    field, len(data[field]), attrs_count
                )
            )
Exemple #21
0
def migrate_v3_to_v4(metadata, data, folder, *args):  # pylint: disable=unused-argument
    """
    Migration of export files from v0.3 to v0.4

    Note concerning migration 0032 - REV. 1.0.32:
    Remove legacy workflow tables: DbWorkflow, DbWorkflowData, DbWorkflowStep
    These were (according to Antimo Marrazzo) never exported.
    """
    old_version = '0.3'
    new_version = '0.4'

    verify_metadata_version(metadata, old_version)
    update_metadata(metadata, new_version)

    # Apply migrations in correct sequential order
    migration_base_data_plugin_type_string(data)
    migration_process_type(metadata, data)
    migration_code_sub_class_of_data(data)
    migration_add_node_uuid_unique_constraint(data)
    migration_migrate_builtin_calculations(data)
    migration_provenance_redesign(data)
    migration_dbgroup_name_to_label_type_to_type_string(metadata, data)
    migration_dbgroup_type_string_change_content(data)
    migration_calc_job_option_attribute_keys(data)
    migration_move_data_within_node_module(data)
    migration_trajectory_symbols_to_attribute(data, folder)
    migration_remove_node_prefix(data)
    migration_rename_parameter_data_to_dict(data)
    migration_dbnode_type_to_dbnode_node_type(metadata, data)
    migration_remove_dbcomputer_enabled(metadata, data)
    migration_replace_text_field_with_json_field(data)

    # Add Node Extras
    add_extras(data)

    # Update metadata.json with the new Log and Comment entities
    new_entities = {
        'Log': {
            'uuid': {},
            'time': {
                'convert_type': 'date'
            },
            'loggername': {},
            'levelname': {},
            'message': {},
            'metadata': {},
            'dbnode': {
                'related_name': 'dblogs',
                'requires': 'Node'
            }
        },
        'Comment': {
            'uuid': {},
            'ctime': {
                'convert_type': 'date'
            },
            'mtime': {
                'convert_type': 'date'
            },
            'content': {},
            'dbnode': {
                'related_name': 'dbcomments',
                'requires': 'Node'
            },
            'user': {
                'related_name': 'dbcomments',
                'requires': 'User'
            }
        }
    }
    metadata['all_fields_info'].update(new_entities)
    metadata['unique_identifiers'].update({'Log': 'uuid', 'Comment': 'uuid'})