def _atomic_write(self, filepath=None): """Write the config as it is in memory, i.e. the contents of ``self.dictionary``, to disk. .. note:: this command will write the config from memory to a temporary file in the same directory as the target file ``filepath``. It will then use ``os.rename`` to move the temporary file to ``filepath`` which will be overwritten if it already exists. The ``os.rename`` is the operation that gives the best guarantee of being atomic within the limitations of the application. :param filepath: optional filepath to write the contents to, if not specified, the default filename is used. """ from .settings import DEFAULT_UMASK, DEFAULT_CONFIG_INDENT_SIZE umask = os.umask(DEFAULT_UMASK) if filepath is None: filepath = self.filepath # Create a temporary file in the same directory as the target filepath, which guarantees that the temporary # file is on the same filesystem, which is necessary to be able to use ``os.rename``. Since we are moving the # temporary file, we should also tell the tempfile to not be automatically deleted as that will raise. with tempfile.NamedTemporaryFile(dir=os.path.dirname(filepath), delete=False) as handle: try: json.dump(self.dictionary, handle, indent=DEFAULT_CONFIG_INDENT_SIZE) finally: os.umask(umask) handle.flush() os.rename(handle.name, self.filepath)
def test_check_for_export_format_version(self): """Test the check for the export format version.""" # Creating a folder for the import/export files export_file_tmp_folder = tempfile.mkdtemp() unpack_tmp_folder = tempfile.mkdtemp() try: struct = orm.StructureData() struct.store() filename = os.path.join(export_file_tmp_folder, 'export.tar.gz') export([struct], outfile=filename, silent=True) with tarfile.open(filename, 'r:gz', format=tarfile.PAX_FORMAT) as tar: tar.extractall(unpack_tmp_folder) with open(os.path.join(unpack_tmp_folder, 'metadata.json'), 'r', encoding='utf8') as fhandle: metadata = json.load(fhandle) metadata['export_version'] = 0.0 with open(os.path.join(unpack_tmp_folder, 'metadata.json'), 'wb') as fhandle: json.dump(metadata, fhandle) with tarfile.open(filename, 'w:gz', format=tarfile.PAX_FORMAT) as tar: tar.add(unpack_tmp_folder, arcname='') self.tearDownClass() self.setUpClass() with self.assertRaises(exceptions.IncompatibleArchiveVersionError): import_data(filename, silent=True) finally: # Deleting the created temporary folders shutil.rmtree(export_file_tmp_folder, ignore_errors=True) shutil.rmtree(unpack_tmp_folder, ignore_errors=True)
def test_check_for_export_format_version(aiida_profile, tmp_path): """Test the check for the export format version.""" # Creating a folder for the archive files export_file_tmp_folder = tmp_path / 'export_tmp' export_file_tmp_folder.mkdir() unpack_tmp_folder = tmp_path / 'unpack_tmp' unpack_tmp_folder.mkdir() aiida_profile.reset_db() struct = orm.StructureData() struct.store() filename = str(export_file_tmp_folder / 'export.aiida') export([struct], filename=filename, file_format='tar.gz') with tarfile.open(filename, 'r:gz', format=tarfile.PAX_FORMAT) as tar: tar.extractall(unpack_tmp_folder) with (unpack_tmp_folder / 'metadata.json').open( 'r', encoding='utf8') as fhandle: metadata = json.load(fhandle) metadata['export_version'] = 0.0 with (unpack_tmp_folder / 'metadata.json').open('wb') as fhandle: json.dump(metadata, fhandle) with tarfile.open(filename, 'w:gz', format=tarfile.PAX_FORMAT) as tar: tar.add(unpack_tmp_folder, arcname='') aiida_profile.reset_db() with pytest.raises(exceptions.IncompatibleArchiveVersionError): import_data(filename)
def prepare_for_submission(self, folder: Folder) -> CalcInfo: """Prepare the calculation for submission. :param folder: a temporary folder on the local file system. :returns: the `CalcInfo` instance """ echo_value = "fail" if self.node.get_option( "fail_calcjob") else "success" with folder.open(self.options.input_filename, "w", encoding="utf8") as handle: handle.write(f"sleep {self.inputs.time.value}\n") handle.write(f'echo "{echo_value}"\n') with folder.open(self.options.payload_filename, "wb") as handle: json.dump(self.inputs.payload.get_dict(), handle) codeinfo = CodeInfo() codeinfo.code_uuid = self.inputs.code.uuid codeinfo.stdin_name = self.options.input_filename codeinfo.stdout_name = self.options.output_filename calcinfo = CalcInfo() calcinfo.codes_info = [codeinfo] calcinfo.retrieve_list = [ self.options.output_filename, self.options.payload_filename, ] return calcinfo
def store(self): """Write the current config to file. .. note:: if the configuration file already exists on disk and its contents differ from those in memory, a backup of the original file on disk will be created before overwriting it. :return: self """ from aiida.common.files import md5_from_filelike, md5_file from .settings import DEFAULT_CONFIG_INDENT_SIZE # If the filepath of this configuration does not yet exist, simply write it. if not os.path.isfile(self.filepath): self._atomic_write() return self # Otherwise, we write the content to a temporary file and compare its md5 checksum with the current config on # disk. When the checksums differ, we first create a backup and only then overwrite the existing file. with tempfile.NamedTemporaryFile() as handle: json.dump(self.dictionary, handle, indent=DEFAULT_CONFIG_INDENT_SIZE) handle.seek(0) if md5_from_filelike(handle) != md5_file(self.filepath): self._backup(self.filepath) self._atomic_write() return self
def test_dangling_link_to_existing_db_node(self, temp_dir): """A dangling link that references a Node that is not included in the archive should `not` be importable""" struct = orm.StructureData() struct.store() struct_uuid = struct.uuid calc = orm.CalculationNode() calc.add_incoming(struct, LinkType.INPUT_CALC, 'input') calc.store() calc.seal() calc_uuid = calc.uuid filename = os.path.join(temp_dir, 'export.aiida') export([struct], filename=filename, file_format='tar.gz') unpack = SandboxFolder() with tarfile.open(filename, 'r:gz', format=tarfile.PAX_FORMAT) as tar: tar.extractall(unpack.abspath) with open(unpack.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: data = json.load(fhandle) data['links_uuid'].append({ 'output': calc.uuid, 'input': struct.uuid, 'label': 'input', 'type': LinkType.INPUT_CALC.value }) with open(unpack.get_abs_path('data.json'), 'wb') as fhandle: json.dump(data, fhandle) with tarfile.open(filename, 'w:gz', format=tarfile.PAX_FORMAT) as tar: tar.add(unpack.abspath, arcname='') # Make sure the CalculationNode is still in the database builder = orm.QueryBuilder().append(orm.CalculationNode, project='uuid') self.assertEqual( builder.count(), 1, msg= f'There should be a single CalculationNode, instead {builder.count()} has been found' ) self.assertEqual(builder.all()[0][0], calc_uuid) with self.assertRaises(DanglingLinkError): import_data(filename) # Using the flag `ignore_unknown_nodes` should import it without problems import_data(filename, ignore_unknown_nodes=True) builder = orm.QueryBuilder().append(orm.StructureData, project='uuid') self.assertEqual( builder.count(), 1, msg= f'There should be a single StructureData, instead {builder.count()} has been found' ) self.assertEqual(builder.all()[0][0], struct_uuid)
def _store_backup_info(self, backup_info_file_name): """ This method writes the backup variables dictionary to a file with the given filename. """ backup_variables = self._dictionarize_backup_info() with io.open(backup_info_file_name, 'wb') as backup_info_file: json.dump(backup_variables, backup_info_file)
def migrate(input_file, output_file, force, silent, archive_format): # pylint: disable=too-many-locals,too-many-statements,too-many-branches """ Migrate an old export archive file to the most recent format. """ import tarfile import zipfile from aiida.common import json from aiida.common.folders import SandboxFolder from aiida.tools.importexport import migration, extract_zip, extract_tar if os.path.exists(output_file) and not force: echo.echo_critical('the output file already exists') with SandboxFolder(sandbox_in_repo=False) as folder: if zipfile.is_zipfile(input_file): extract_zip(input_file, folder, silent=silent) elif tarfile.is_tarfile(input_file): extract_tar(input_file, folder, silent=silent) else: echo.echo_critical('invalid file format, expected either a zip archive or gzipped tarball') try: with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: data = json.load(fhandle) with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: metadata = json.load(fhandle) except IOError: echo.echo_critical('export archive does not contain the required file {}'.format(fhandle.filename)) old_version = migration.verify_metadata_version(metadata) new_version = migration.migrate_recursively(metadata, data, folder) with io.open(folder.get_abs_path('data.json'), 'wb') as fhandle: json.dump(data, fhandle, indent=4) with io.open(folder.get_abs_path('metadata.json'), 'wb') as fhandle: json.dump(metadata, fhandle) if archive_format in ['zip', 'zip-uncompressed']: compression = zipfile.ZIP_DEFLATED if archive_format == 'zip' else zipfile.ZIP_STORED with zipfile.ZipFile(output_file, mode='w', compression=compression, allowZip64=True) as archive: src = folder.abspath for dirpath, dirnames, filenames in os.walk(src): relpath = os.path.relpath(dirpath, src) for filename in dirnames + filenames: real_src = os.path.join(dirpath, filename) real_dest = os.path.join(relpath, filename) archive.write(real_src, real_dest) elif archive_format == 'tar.gz': with tarfile.open(output_file, 'w:gz', format=tarfile.PAX_FORMAT, dereference=True) as archive: archive.add(folder.abspath, arcname='') if not silent: echo.echo_success('migrated the archive from version {} to {}'.format(old_version, new_version))
def migrate_archive(input_file, output_file, silent=True): """Migrate contents using `migrate_recursively` This is essentially similar to `verdi export migrate`. However, since this command may be disabled, this function simulates it and keeps the tests working. :param input_file: filename with full path for archive to be migrated :param output_file: filename with full path for archive to be created after migration """ from aiida.tools.importexport.migration import migrate_recursively # Unpack archive, migrate, and re-pack archive with SandboxFolder(sandbox_in_repo=False) as folder: if zipfile.is_zipfile(input_file): extract_zip(input_file, folder, silent=silent) elif tarfile.is_tarfile(input_file): extract_tar(input_file, folder, silent=silent) else: raise ValueError( 'invalid file format, expected either a zip archive or gzipped tarball' ) try: with open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: data = json.load(fhandle) with open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: metadata = json.load(fhandle) except IOError: raise NotExistent( 'export archive does not contain the required file {}'.format( fhandle.filename)) # Migrate migrate_recursively(metadata, data, folder) # Write json files with open(folder.get_abs_path('data.json'), 'wb') as fhandle: json.dump(data, fhandle, indent=4) with open(folder.get_abs_path('metadata.json'), 'wb') as fhandle: json.dump(metadata, fhandle, indent=4) # Pack archive compression = zipfile.ZIP_DEFLATED with zipfile.ZipFile(output_file, mode='w', compression=compression, allowZip64=True) as archive: src = folder.abspath for dirpath, dirnames, filenames in os.walk(src): relpath = os.path.relpath(dirpath, src) for filename in dirnames + filenames: real_src = os.path.join(dirpath, filename) real_dest = os.path.join(relpath, filename) archive.write(real_src, real_dest)
def export_workflow_data(apps, _): """Export existing legacy workflow data to a JSON file.""" from tempfile import NamedTemporaryFile DbWorkflow = apps.get_model('db', 'DbWorkflow') DbWorkflowData = apps.get_model('db', 'DbWorkflowData') DbWorkflowStep = apps.get_model('db', 'DbWorkflowStep') count_workflow = DbWorkflow.objects.count() count_workflow_data = DbWorkflowData.objects.count() count_workflow_step = DbWorkflowStep.objects.count() # Nothing to do if all tables are empty if count_workflow == 0 and count_workflow_data == 0 and count_workflow_step == 0: return if not configuration.PROFILE.is_test_profile: echo.echo('\n') echo.echo_warning( 'The legacy workflow tables contain data but will have to be dropped to continue.' ) echo.echo_warning( 'If you continue, the content will be dumped to a JSON file, before dropping the tables.' ) echo.echo_warning( 'This serves merely as a reference and cannot be used to restore the database.' ) echo.echo_warning( 'If you want a proper backup, make sure to dump the full database and backup your repository' ) if not click.confirm('Are you sure you want to continue', default=True): sys.exit(1) delete_on_close = configuration.PROFILE.is_test_profile data = { 'workflow': serializers.serialize('json', DbWorkflow.objects.all()), 'workflow_data': serializers.serialize('json', DbWorkflowData.objects.all()), 'workflow_step': serializers.serialize('json', DbWorkflowStep.objects.all()), } with NamedTemporaryFile(prefix='legacy-workflows', suffix='.json', dir='.', delete=delete_on_close, mode='wb') as handle: filename = handle.name json.dump(data, handle) # If delete_on_close is False, we are running for the user and add additional message of file location if not delete_on_close: echo.echo_info(f'Exported workflow data to {filename}')
def store(self): """Write the current config to file.""" self._backup() umask = os.umask(DEFAULT_UMASK) try: with io.open(self.filepath, 'wb') as handle: json.dump(self.dictionary, handle, indent=DEFAULT_CONFIG_INDENT_SIZE) finally: os.umask(umask) return self
def _write(self, filelike): """Write the contents of `self.dictionary` to the given file handle. :param filelike: the filelike object to write the current configuration to """ from .settings import DEFAULT_UMASK, DEFAULT_CONFIG_INDENT_SIZE umask = os.umask(DEFAULT_UMASK) try: json.dump(self.dictionary, filelike, indent=DEFAULT_CONFIG_INDENT_SIZE) finally: os.umask(umask)
def _write_object(self, path: str, ctype: str, content: Any): """Write an object from the cache to disk. :param path: relative path of file :param ctype: the type of the content :param content: the content to write """ if ctype == 'text': (self._path / path).write_text(content, encoding=self._encoding) elif ctype == 'json': with (self._path / path).open(mode='wb') as handle: json.dump(content, handle) else: raise TypeError(f'Unknown content type: {ctype}')
def write_metadata(self, data: ArchiveMetadata): metadata = { 'export_version': self.export_version, 'aiida_version': data.aiida_version, 'all_fields_info': data.all_fields_info, 'unique_identifiers': data.unique_identifiers, 'export_parameters': { 'graph_traversal_rules': data.graph_traversal_rules, 'entities_starting_set': data.entities_starting_set, 'include_comments': data.include_comments, 'include_logs': data.include_logs, }, 'conversion_info': data.conversion_info } with self._folder.open('metadata.json', 'wb') as handle: json.dump(metadata, handle)
def close(self, excepted: bool): self.assert_within_context() if excepted: self._archivepath.close() shutil.rmtree(self._temp_path) return # write data.json with self._archivepath.joinpath('data.json').open('wb') as handle: json.dump(self._data, handle) # compress # close the zipfile to finalise write self._archivepath.close() # move the compressed file to the final path self._remove_filepath() shutil.move(str(self._archivepath.filepath), str(self.filepath)) # remove the temporary folder shutil.rmtree(self._temp_path)
def export_workflow_data(connection): """Export existing legacy workflow data to a JSON file.""" from tempfile import NamedTemporaryFile DbWorkflow = table('db_dbworkflow') DbWorkflowData = table('db_dbworkflowdata') DbWorkflowStep = table('db_dbworkflowstep') count_workflow = connection.execute(select([func.count()]).select_from(DbWorkflow)).scalar() count_workflow_data = connection.execute(select([func.count()]).select_from(DbWorkflowData)).scalar() count_workflow_step = connection.execute(select([func.count()]).select_from(DbWorkflowStep)).scalar() # Nothing to do if all tables are empty if count_workflow == 0 and count_workflow_data == 0 and count_workflow_step == 0: return if not configuration.PROFILE.is_test_profile: echo.echo('\n') echo.echo_warning('The legacy workflow tables contain data but will have to be dropped to continue.') echo.echo_warning('If you continue, the content will be dumped to a JSON file, before dropping the tables.') echo.echo_warning('This serves merely as a reference and cannot be used to restore the database.') echo.echo_warning('If you want a proper backup, make sure to dump the full database and backup your repository') if not click.confirm('Are you sure you want to continue', default=True): sys.exit(1) delete_on_close = configuration.PROFILE.is_test_profile data = { 'workflow': [dict(row) for row in connection.execute(select(['*']).select_from(DbWorkflow))], 'workflow_data': [dict(row) for row in connection.execute(select(['*']).select_from(DbWorkflowData))], 'workflow_step': [dict(row) for row in connection.execute(select(['*']).select_from(DbWorkflowStep))], } with NamedTemporaryFile( prefix='legacy-workflows', suffix='.json', dir='.', delete=delete_on_close, mode='w+' ) as handle: filename = handle.name json.dump(data, handle, default=json_serializer) # If delete_on_close is False, we are running for the user and add additional message of file location if not delete_on_close: echo.echo_info('Exported workflow data to {}'.format(filename))
def test_links_to_unknown_nodes(self, temp_dir): """Test importing of nodes, that have links to unknown nodes.""" node_label = 'Test structure data' struct = orm.StructureData() struct.label = str(node_label) struct.store() struct_uuid = struct.uuid filename = os.path.join(temp_dir, 'export.aiida') export([struct], filename=filename, file_format='tar.gz') unpack = SandboxFolder() with tarfile.open(filename, 'r:gz', format=tarfile.PAX_FORMAT) as tar: tar.extractall(unpack.abspath) with open(unpack.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: data = json.load(fhandle) data['links_uuid'].append({ 'output': struct.uuid, # note: this uuid is supposed to not be in the DB: 'input': get_new_uuid(), 'label': 'parent', 'type': LinkType.CREATE.value }) with open(unpack.get_abs_path('data.json'), 'wb') as fhandle: json.dump(data, fhandle) with tarfile.open(filename, 'w:gz', format=tarfile.PAX_FORMAT) as tar: tar.add(unpack.abspath, arcname='') self.clean_db() with self.assertRaises(DanglingLinkError): import_data(filename) import_data(filename, ignore_unknown_nodes=True) self.assertEqual(orm.load_node(struct_uuid).label, node_label)
def close(self, excepted: bool): self.assert_within_context() if excepted: return with self._folder.open('data.json', 'wb') as handle: json.dump(self._data, handle)
def migrate(input_file, output_file, force, silent, in_place, archive_format, version): # pylint: disable=too-many-locals,too-many-statements,too-many-branches """Migrate an export archive to a more recent format version.""" import tarfile import zipfile from aiida.common import json from aiida.common.folders import SandboxFolder from aiida.tools.importexport import migration, extract_zip, extract_tar, ArchiveMigrationError, EXPORT_VERSION if version is None: version = EXPORT_VERSION if in_place: if output_file: echo.echo_critical('output file specified together with --in-place flag') tempdir = tempfile.TemporaryDirectory() output_file = os.path.join(tempdir.name, 'archive.aiida') elif not output_file: echo.echo_critical( 'no output file specified. Please add --in-place flag if you would like to migrate in place.' ) if os.path.exists(output_file) and not force: echo.echo_critical('the output file already exists') with SandboxFolder(sandbox_in_repo=False) as folder: if zipfile.is_zipfile(input_file): extract_zip(input_file, folder, silent=silent) elif tarfile.is_tarfile(input_file): extract_tar(input_file, folder, silent=silent) else: echo.echo_critical('invalid file format, expected either a zip archive or gzipped tarball') try: with open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: data = json.load(fhandle) with open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: metadata = json.load(fhandle) except IOError: echo.echo_critical('export archive does not contain the required file {}'.format(fhandle.filename)) old_version = migration.verify_metadata_version(metadata) if version <= old_version: echo.echo_success('nothing to be done - archive already at version {} >= {}'.format(old_version, version)) return try: new_version = migration.migrate_recursively(metadata, data, folder, version) except ArchiveMigrationError as exception: echo.echo_critical(str(exception)) with open(folder.get_abs_path('data.json'), 'wb') as fhandle: json.dump(data, fhandle, indent=4) with open(folder.get_abs_path('metadata.json'), 'wb') as fhandle: json.dump(metadata, fhandle) if archive_format in ['zip', 'zip-uncompressed']: compression = zipfile.ZIP_DEFLATED if archive_format == 'zip' else zipfile.ZIP_STORED with zipfile.ZipFile(output_file, mode='w', compression=compression, allowZip64=True) as archive: src = folder.abspath for dirpath, dirnames, filenames in os.walk(src): relpath = os.path.relpath(dirpath, src) for filename in dirnames + filenames: real_src = os.path.join(dirpath, filename) real_dest = os.path.join(relpath, filename) archive.write(real_src, real_dest) elif archive_format == 'tar.gz': with tarfile.open(output_file, 'w:gz', format=tarfile.PAX_FORMAT, dereference=True) as archive: archive.add(folder.abspath, arcname='') if in_place: os.rename(output_file, input_file) tempdir.cleanup() if not silent: echo.echo_success('migrated the archive from version {} to {}'.format(old_version, new_version))
def run(self): """Run the backup.""" conf_backup_folder_abs = self.create_dir( 'Please provide the backup folder by providing the full path.', os.path.join(os.path.expanduser(AIIDA_CONFIG_FOLDER), self._conf_backup_folder_rel) ) file_backup_folder_abs = self.create_dir( 'Please provide the destination folder of the backup (normally in ' 'the previously provided backup folder).', os.path.join(conf_backup_folder_abs, self._file_backup_folder_rel) ) # The template backup configuration file template_conf_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), self._backup_info_tmpl_filename) # Copy the sample configuration file to the backup folder try: shutil.copy(template_conf_path, conf_backup_folder_abs) except OSError: self._logger.error( 'Error copying the file %s to the directory %s', template_conf_path, conf_backup_folder_abs ) raise if utils.query_yes_no( 'A sample configuration file was copied to {}. ' 'Would you like to see the configuration parameters explanation?'.format(conf_backup_folder_abs), default='yes' ): self.print_info() # Construct the path to the backup configuration file final_conf_filepath = os.path.join(conf_backup_folder_abs, self._backup_info_filename) # If the backup parameters are configured now if utils.query_yes_no('Would you like to configure the backup configuration file now?', default='yes'): # Ask questions to properly setup the backup variables backup_variables = self.construct_backup_variables(file_backup_folder_abs) with io.open(final_conf_filepath, 'wb') as backup_info_file: json.dump(backup_variables, backup_info_file) # If the backup parameters are configured manually else: sys.stdout.write( 'Please rename the file {} '.format(self._backup_info_tmpl_filename) + 'found in {} to '.format(conf_backup_folder_abs) + '{} and '.format(self._backup_info_filename) + 'change the backup parameters accordingly.\n' ) sys.stdout.write( 'Please adapt the startup script accordingly to point to the ' + 'correct backup configuration file. For the moment, it points ' + 'to {}\n'.format(os.path.join(conf_backup_folder_abs, self._backup_info_filename)) ) script_content = \ u"""#!/usr/bin/env python import logging from aiida.manage.configuration import load_profile load_profile(profile='{}') from aiida.manage.backup.backup_general import Backup # Create the backup instance backup_inst = Backup(backup_info_filepath="{}", additional_back_time_mins=2) # Define the backup logging level backup_inst._logger.setLevel(logging.INFO) # Start the backup backup_inst.run() """.format(configuration.PROFILE.name, final_conf_filepath) # Script full path script_path = os.path.join(conf_backup_folder_abs, self._script_filename) # Write the contents to the script with io.open(script_path, 'w', encoding='utf8') as script_file: script_file.write(script_content) # Set the right permissions try: statistics = os.stat(script_path) os.chmod(script_path, statistics.st_mode | stat.S_IEXEC) except OSError: self._logger.error('Problem setting the right permissions to the script %s.', script_path) raise sys.stdout.write('Backup setup completed.\n')