class TestVerdiDataRemote(AiidaTestCase): """ Testing verdi data remote """ @classmethod def setUpClass(cls): super(TestVerdiDataRemote, cls).setUpClass() user = orm.User.objects.get_default() orm.AuthInfo(cls.computer, user).store() def setUp(self): comp = self.computer self.r = RemoteData() p = tempfile.mkdtemp() self.r.set_remote_path(p) with io.open(p + '/file.txt', 'w', encoding='utf8') as fhandle: fhandle.write(u'test string') self.r.computer = comp self.r.store() self.cli_runner = CliRunner() def test_remoteshowhelp(self): output = sp.check_output(['verdi', 'data', 'remote', 'show', '--help']) self.assertIn(b'Usage:', output, 'Sub-command verdi data remote show --help failed.') def test_remoteshow(self): options = [str(self.r.id)] res = self.cli_runner.invoke(cmd_remote.remote_show, options, catch_exceptions=False) self.assertEqual(res.exit_code, 0, 'The command verdi data remote show did not finish correctly') self.assertIn(b'Remote computer name:', res.stdout_bytes, 'The string "Remote computer name:" was not found in the' ' output of verdi data remote show') self.assertIn(b'Remote folder full path:', res.stdout_bytes, 'The string "Remote folder full path:" was not found in the' ' output of verdi data remote show') def test_remotelshelp(self): output = sp.check_output(['verdi', 'data', 'remote', 'ls', '--help']) self.assertIn(b'Usage:', output, 'Sub-command verdi data remote ls --help failed.') def test_remotels(self): options = ['--long', str(self.r.id)] res = self.cli_runner.invoke(cmd_remote.remote_ls, options, catch_exceptions=False) self.assertEqual(res.exit_code, 0, 'The command verdi data remote ls did not finish correctly') self.assertIn(b'file.txt', res.stdout_bytes, 'The file "file.txt" was not found in the output' ' of verdi data remote ls') def test_remotecathelp(self): output = sp.check_output(['verdi', 'data', 'remote', 'cat', '--help']) self.assertIn(b'Usage:', output, 'Sub-command verdi data remote cat --help failed.') def test_remotecat(self): options = [str(self.r.id), 'file.txt'] res = self.cli_runner.invoke(cmd_remote.remote_cat, options, catch_exceptions=False) self.assertEqual(res.exit_code, 0, 'The command verdi data remote cat did not finish correctly') self.assertIn(b'test string', res.stdout_bytes, 'The string "test string" was not found in the output' ' of verdi data remote cat file.txt')
class TestRemoteData(AiidaTestCase): """Test for the RemoteData class.""" @classmethod def setUpClass(cls): super(TestRemoteData, cls).setUpClass() user = User.objects.get_default() authinfo = AuthInfo(cls.computer, user) authinfo.store() def setUp(self): """Create a dummy RemoteData on the default computer.""" self.tmp_path = tempfile.mkdtemp() self.remote = RemoteData(computer=self.computer) self.remote.set_remote_path(self.tmp_path) with io.open(os.path.join(self.tmp_path, 'file.txt'), 'w', encoding='utf8') as fhandle: fhandle.write(u'test string') self.remote.computer = self.computer self.remote.store() def tearDown(self): """Delete the temporary path for the dummy RemoteData node.""" try: shutil.rmtree(self.tmp_path) except OSError as exception: if exception.errno == errno.ENOENT: pass elif exception.errno == errno.ENOTDIR: os.remove(self.tmp_path) else: raise IOError(exception) def test_clean(self): """Try cleaning a RemoteData node.""" self.assertFalse(self.remote.is_empty) self.remote._clean() self.assertTrue(self.remote.is_empty)
def upload_calculation(node, transport, calc_info, script_filename, dry_run=False): """Upload a `CalcJob` instance :param node: the `CalcJobNode`. :param transport: an already opened transport to use to submit the calculation. :param calc_info: the calculation info datastructure returned by `CalcJobNode.presubmit` :param script_filename: the job launch script returned by `CalcJobNode.presubmit` :return: tuple of ``calc_info`` and ``script_filename`` """ from logging import LoggerAdapter from tempfile import NamedTemporaryFile from aiida.orm import load_node, Code, RemoteData # If the calculation already has a `remote_folder`, simply return. The upload was apparently already completed # before, which can happen if the daemon is restarted and it shuts down after uploading but before getting the # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the upload. link_label = 'remote_folder' if node.get_outgoing(RemoteData, link_label_filter=link_label).first(): execlogger.warning( 'CalcJobNode<{}> already has a `{}` output: skipping upload'. format(node.pk, link_label)) return calc_info, script_filename computer = node.computer codes_info = calc_info.codes_info input_codes = [ load_node(_.code_uuid, sub_classes=(Code, )) for _ in codes_info ] logger_extra = get_dblogger_extra(node) transport.set_logger_extra(logger_extra) logger = LoggerAdapter(logger=execlogger, extra=logger_extra) if not dry_run and node.has_cached_links(): raise ValueError( 'Cannot submit calculation {} because it has cached input links! If you just want to test the ' 'submission, set `metadata.dry_run` to True in the inputs.'.format( node.pk)) folder = node._raw_input_folder # If we are performing a dry-run, the working directory should actually be a local folder that should already exist if dry_run: workdir = transport.getcwd() else: remote_user = transport.whoami() # TODO Doc: {username} field # TODO: if something is changed here, fix also 'verdi computer test' remote_working_directory = computer.get_workdir().format( username=remote_user) if not remote_working_directory.strip(): raise exceptions.ConfigurationError( "[submission of calculation {}] No remote_working_directory configured for computer '{}'" .format(node.pk, computer.name)) # If it already exists, no exception is raised try: transport.chdir(remote_working_directory) except IOError: logger.debug( '[submission of calculation {}] Unable to chdir in {}, trying to create it' .format(node.pk, remote_working_directory)) try: transport.makedirs(remote_working_directory) transport.chdir(remote_working_directory) except EnvironmentError as exc: raise exceptions.ConfigurationError( '[submission of calculation {}] ' 'Unable to create the remote directory {} on ' "computer '{}': {}".format(node.pk, remote_working_directory, computer.name, exc)) # Store remotely with sharding (here is where we choose # the folder structure of remote jobs; then I store this # in the calculation properties using _set_remote_dir # and I do not have to know the logic, but I just need to # read the absolute path from the calculation properties. transport.mkdir(calc_info.uuid[:2], ignore_existing=True) transport.chdir(calc_info.uuid[:2]) transport.mkdir(calc_info.uuid[2:4], ignore_existing=True) transport.chdir(calc_info.uuid[2:4]) try: # The final directory may already exist, most likely because this function was already executed once, but # failed and as a result was rescheduled by the eninge. In this case it would be fine to delete the folder # and create it from scratch, except that we cannot be sure that this the actual case. Therefore, to err on # the safe side, we move the folder to the lost+found directory before recreating the folder from scratch transport.mkdir(calc_info.uuid[4:]) except OSError: # Move the existing directory to lost+found, log a warning and create a clean directory anyway path_existing = os.path.join(transport.getcwd(), calc_info.uuid[4:]) path_lost_found = os.path.join(remote_working_directory, REMOTE_WORK_DIRECTORY_LOST_FOUND) path_target = os.path.join(path_lost_found, calc_info.uuid) logger.warning( 'tried to create path {} but it already exists, moving the entire folder to {}' .format(path_existing, path_target)) # Make sure the lost+found directory exists, then copy the existing folder there and delete the original transport.mkdir(path_lost_found, ignore_existing=True) transport.copytree(path_existing, path_target) transport.rmtree(path_existing) # Now we can create a clean folder for this calculation transport.mkdir(calc_info.uuid[4:]) finally: transport.chdir(calc_info.uuid[4:]) # I store the workdir of the calculation for later file retrieval workdir = transport.getcwd() node.set_remote_workdir(workdir) # I first create the code files, so that the code can put # default files to be overwritten by the plugin itself. # Still, beware! The code file itself could be overwritten... # But I checked for this earlier. for code in input_codes: if code.is_local(): # Note: this will possibly overwrite files for f in code.get_folder_list(): transport.put(code.get_abs_path(f), f) transport.chmod(code.get_local_executable(), 0o755) # rwxr-xr-x # In a dry_run, the working directory is the raw input folder, which will already contain these resources if not dry_run: for filename in folder.get_content_list(): logger.debug( '[submission of calculation {}] copying file/folder {}...'. format(node.pk, filename)) transport.put(folder.get_abs_path(filename), filename) # local_copy_list is a list of tuples, each with (uuid, dest_rel_path) # NOTE: validation of these lists are done inside calculation.presubmit() local_copy_list = calc_info.local_copy_list or [] remote_copy_list = calc_info.remote_copy_list or [] remote_symlink_list = calc_info.remote_symlink_list or [] for uuid, filename, target in local_copy_list: logger.debug( '[submission of calculation {}] copying local file/folder to {}'. format(node.pk, target)) try: data_node = load_node(uuid=uuid) except exceptions.NotExistent: logger.warning( 'failed to load Node<{}> specified in the `local_copy_list`'. format(uuid)) # Note, once #2579 is implemented, use the `node.open` method instead of the named temporary file in # combination with the new `Transport.put_object_from_filelike` # Since the content of the node could potentially be binary, we read the raw bytes and pass them on with NamedTemporaryFile(mode='wb+') as handle: handle.write(data_node.get_object_content(filename, mode='rb')) handle.flush() handle.seek(0) transport.put(handle.name, target) if dry_run: if remote_copy_list: with open(os.path.join(workdir, '_aiida_remote_copy_list.txt'), 'w') as handle: for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_copy_list: handle.write( 'would have copied {} to {} in working directory on remote {}' .format(remote_abs_path, dest_rel_path, computer.name)) if remote_symlink_list: with open(os.path.join(workdir, '_aiida_remote_symlink_list.txt'), 'w') as handle: for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_symlink_list: handle.write( 'would have created symlinks from {} to {} in working directory on remote {}' .format(remote_abs_path, dest_rel_path, computer.name)) else: for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_copy_list: if remote_computer_uuid == computer.uuid: logger.debug( '[submission of calculation {}] copying {} remotely, directly on the machine {}' .format(node.pk, dest_rel_path, computer.name)) try: transport.copy(remote_abs_path, dest_rel_path) except (IOError, OSError): logger.warning( '[submission of calculation {}] Unable to copy remote resource from {} to {}! ' 'Stopping.'.format(node.pk, remote_abs_path, dest_rel_path)) raise else: raise NotImplementedError( '[submission of calculation {}] Remote copy between two different machines is ' 'not implemented yet'.format(node.pk)) for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_symlink_list: if remote_computer_uuid == computer.uuid: logger.debug( '[submission of calculation {}] copying {} remotely, directly on the machine {}' .format(node.pk, dest_rel_path, computer.name)) try: transport.symlink(remote_abs_path, dest_rel_path) except (IOError, OSError): logger.warning( '[submission of calculation {}] Unable to create remote symlink from {} to {}! ' 'Stopping.'.format(node.pk, remote_abs_path, dest_rel_path)) raise else: raise IOError( 'It is not possible to create a symlink between two different machines for ' 'calculation {}'.format(node.pk)) if not dry_run: # Make sure that attaching the `remote_folder` with a link is the last thing we do. This gives the biggest # chance of making this method idempotent. That is to say, if a runner gets interrupted during this action, it # will simply retry the upload, unless we got here and managed to link it up, in which case we move to the next # task. Because in that case, the check for the existence of this link at the top of this function will exit # early from this command. remotedata = RemoteData(computer=computer, remote_path=workdir) remotedata.add_incoming(node, link_type=LinkType.CREATE, link_label='remote_folder') remotedata.store() return calc_info, script_filename
def generate_calcjob_node( self, entry_point_name, retrieved=None, computer_name="localhost", options=None, mark_completed=False, remote_path=None, input_nodes=None, ): """Fixture to generate a mock `CalcJobNode` for testing parsers. Parameters ---------- entry_point_name : str entry point name of the calculation class retrieved : aiida.orm.FolderData containing the file(s) to be parsed computer_name : str used to get or create a ``Computer``, by default 'localhost' options : None or dict any additional metadata options to set on the node remote_path : str path to a folder on the computer mark_completed : bool if True, set the process state to finished, and the exit_status = 0 input_nodes: dict mapping of link label to node Returns ------- aiida.orm.CalcJobNode instance with the `retrieved` node linked as outgoing """ from aiida.common.links import LinkType from aiida.engine import ExitCode, ProcessState from aiida.orm import CalcJobNode, Node, RemoteData from aiida.plugins.entry_point import format_entry_point_string process = self.get_calc_cls(entry_point_name) computer = self.get_or_create_computer(computer_name) entry_point = format_entry_point_string("aiida.calculations", entry_point_name) calc_node = CalcJobNode(computer=computer, process_type=entry_point) calc_node.set_options({ k: v.default() if callable(v.default) else v.default for k, v in process.spec_options.items() if v.has_default() }) calc_node.set_option("resources", { "num_machines": 1, "num_mpiprocs_per_machine": 1 }) calc_node.set_option("max_wallclock_seconds", 1800) if options: calc_node.set_options(options) if mark_completed: calc_node.set_process_state(ProcessState.FINISHED) calc_node.set_exit_status(ExitCode().status) if input_nodes is not None: for label, in_node in input_nodes.items(): in_node_map = in_node if isinstance(in_node, Node): in_node_map = {None: in_node_map} for sublabel, in_node in in_node_map.items(): in_node.store() link_label = (label if sublabel is None else "{}__{}".format(label, sublabel)) calc_node.add_incoming(in_node, link_type=LinkType.INPUT_CALC, link_label=link_label) calc_node.store() if retrieved is not None: retrieved.add_incoming(calc_node, link_type=LinkType.CREATE, link_label="retrieved") retrieved.store() if remote_path is not None: remote = RemoteData(remote_path=remote_path, computer=computer) remote.add_incoming(calc_node, link_type=LinkType.CREATE, link_label="remote_folder") remote.store() return calc_node
def upload_calculation(node, transport, calc_info, folder, inputs=None, dry_run=False): """Upload a `CalcJob` instance :param node: the `CalcJobNode`. :param transport: an already opened transport to use to submit the calculation. :param calc_info: the calculation info datastructure returned by `CalcJob.presubmit` :param folder: temporary local file system folder containing the inputs written by `CalcJob.prepare_for_submission` """ # pylint: disable=too-many-locals,too-many-branches,too-many-statements from logging import LoggerAdapter from tempfile import NamedTemporaryFile from aiida.orm import load_node, Code, RemoteData # If the calculation already has a `remote_folder`, simply return. The upload was apparently already completed # before, which can happen if the daemon is restarted and it shuts down after uploading but before getting the # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the upload. link_label = 'remote_folder' if node.get_outgoing(RemoteData, link_label_filter=link_label).first(): execlogger.warning( f'CalcJobNode<{node.pk}> already has a `{link_label}` output: skipping upload' ) return calc_info computer = node.computer codes_info = calc_info.codes_info input_codes = [ load_node(_.code_uuid, sub_classes=(Code, )) for _ in codes_info ] logger_extra = get_dblogger_extra(node) transport.set_logger_extra(logger_extra) logger = LoggerAdapter(logger=execlogger, extra=logger_extra) if not dry_run and node.has_cached_links(): raise ValueError( 'Cannot submit calculation {} because it has cached input links! If you just want to test the ' 'submission, set `metadata.dry_run` to True in the inputs.'.format( node.pk)) # If we are performing a dry-run, the working directory should actually be a local folder that should already exist if dry_run: workdir = transport.getcwd() else: remote_user = transport.whoami() remote_working_directory = computer.get_workdir().format( username=remote_user) if not remote_working_directory.strip(): raise exceptions.ConfigurationError( "[submission of calculation {}] No remote_working_directory configured for computer '{}'" .format(node.pk, computer.label)) # If it already exists, no exception is raised try: transport.chdir(remote_working_directory) except IOError: logger.debug( '[submission of calculation {}] Unable to chdir in {}, trying to create it' .format(node.pk, remote_working_directory)) try: transport.makedirs(remote_working_directory) transport.chdir(remote_working_directory) except EnvironmentError as exc: raise exceptions.ConfigurationError( '[submission of calculation {}] ' 'Unable to create the remote directory {} on ' "computer '{}': {}".format(node.pk, remote_working_directory, computer.label, exc)) # Store remotely with sharding (here is where we choose # the folder structure of remote jobs; then I store this # in the calculation properties using _set_remote_dir # and I do not have to know the logic, but I just need to # read the absolute path from the calculation properties. transport.mkdir(calc_info.uuid[:2], ignore_existing=True) transport.chdir(calc_info.uuid[:2]) transport.mkdir(calc_info.uuid[2:4], ignore_existing=True) transport.chdir(calc_info.uuid[2:4]) try: # The final directory may already exist, most likely because this function was already executed once, but # failed and as a result was rescheduled by the eninge. In this case it would be fine to delete the folder # and create it from scratch, except that we cannot be sure that this the actual case. Therefore, to err on # the safe side, we move the folder to the lost+found directory before recreating the folder from scratch transport.mkdir(calc_info.uuid[4:]) except OSError: # Move the existing directory to lost+found, log a warning and create a clean directory anyway path_existing = os.path.join(transport.getcwd(), calc_info.uuid[4:]) path_lost_found = os.path.join(remote_working_directory, REMOTE_WORK_DIRECTORY_LOST_FOUND) path_target = os.path.join(path_lost_found, calc_info.uuid) logger.warning( f'tried to create path {path_existing} but it already exists, moving the entire folder to {path_target}' ) # Make sure the lost+found directory exists, then copy the existing folder there and delete the original transport.mkdir(path_lost_found, ignore_existing=True) transport.copytree(path_existing, path_target) transport.rmtree(path_existing) # Now we can create a clean folder for this calculation transport.mkdir(calc_info.uuid[4:]) finally: transport.chdir(calc_info.uuid[4:]) # I store the workdir of the calculation for later file retrieval workdir = transport.getcwd() node.set_remote_workdir(workdir) # I first create the code files, so that the code can put # default files to be overwritten by the plugin itself. # Still, beware! The code file itself could be overwritten... # But I checked for this earlier. for code in input_codes: if code.is_local(): # Note: this will possibly overwrite files for filename in code.list_object_names(): # Note, once #2579 is implemented, use the `node.open` method instead of the named temporary file in # combination with the new `Transport.put_object_from_filelike` # Since the content of the node could potentially be binary, we read the raw bytes and pass them on with NamedTemporaryFile(mode='wb+') as handle: handle.write(code.get_object_content(filename, mode='rb')) handle.flush() transport.put(handle.name, filename) transport.chmod(code.get_local_executable(), 0o755) # rwxr-xr-x # local_copy_list is a list of tuples, each with (uuid, dest_rel_path) # NOTE: validation of these lists are done inside calculation.presubmit() local_copy_list = calc_info.local_copy_list or [] remote_copy_list = calc_info.remote_copy_list or [] remote_symlink_list = calc_info.remote_symlink_list or [] provenance_exclude_list = calc_info.provenance_exclude_list or [] for uuid, filename, target in local_copy_list: logger.debug( f'[submission of calculation {node.uuid}] copying local file/folder to {target}' ) def find_data_node(inputs, uuid): """Find and return the node with the given UUID from a nested mapping of input nodes. :param inputs: (nested) mapping of nodes :param uuid: UUID of the node to find :return: instance of `Node` or `None` if not found """ from collections.abc import Mapping data_node = None for input_node in inputs.values(): if isinstance(input_node, Mapping): data_node = find_data_node(input_node, uuid) elif isinstance(input_node, Node) and input_node.uuid == uuid: data_node = input_node if data_node is not None: break return data_node try: data_node = load_node(uuid=uuid) except exceptions.NotExistent: data_node = find_data_node(inputs, uuid) if data_node is None: logger.warning( f'failed to load Node<{uuid}> specified in the `local_copy_list`' ) else: dirname = os.path.dirname(target) if dirname: os.makedirs(os.path.join(folder.abspath, dirname), exist_ok=True) with folder.open(target, 'wb') as handle: with data_node.open(filename, 'rb') as source: shutil.copyfileobj(source, handle) provenance_exclude_list.append(target) # In a dry_run, the working directory is the raw input folder, which will already contain these resources if not dry_run: for filename in folder.get_content_list(): logger.debug( f'[submission of calculation {node.pk}] copying file/folder {filename}...' ) transport.put(folder.get_abs_path(filename), filename) for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_copy_list: if remote_computer_uuid == computer.uuid: logger.debug( '[submission of calculation {}] copying {} remotely, directly on the machine {}' .format(node.pk, dest_rel_path, computer.label)) try: transport.copy(remote_abs_path, dest_rel_path) except (IOError, OSError): logger.warning( '[submission of calculation {}] Unable to copy remote resource from {} to {}! ' 'Stopping.'.format(node.pk, remote_abs_path, dest_rel_path)) raise else: raise NotImplementedError( '[submission of calculation {}] Remote copy between two different machines is ' 'not implemented yet'.format(node.pk)) for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_symlink_list: if remote_computer_uuid == computer.uuid: logger.debug( '[submission of calculation {}] copying {} remotely, directly on the machine {}' .format(node.pk, dest_rel_path, computer.label)) try: transport.symlink(remote_abs_path, dest_rel_path) except (IOError, OSError): logger.warning( '[submission of calculation {}] Unable to create remote symlink from {} to {}! ' 'Stopping.'.format(node.pk, remote_abs_path, dest_rel_path)) raise else: raise IOError( f'It is not possible to create a symlink between two different machines for calculation {node.pk}' ) else: if remote_copy_list: with open(os.path.join(workdir, '_aiida_remote_copy_list.txt'), 'w') as handle: for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_copy_list: handle.write( 'would have copied {} to {} in working directory on remote {}' .format(remote_abs_path, dest_rel_path, computer.label)) if remote_symlink_list: with open(os.path.join(workdir, '_aiida_remote_symlink_list.txt'), 'w') as handle: for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_symlink_list: handle.write( 'would have created symlinks from {} to {} in working directory on remote {}' .format(remote_abs_path, dest_rel_path, computer.label)) # Loop recursively over content of the sandbox folder copying all that are not in `provenance_exclude_list`. Note # that directories are not created explicitly. The `node.put_object_from_filelike` call will create intermediate # directories for nested files automatically when needed. This means though that empty folders in the sandbox or # folders that would be empty when considering the `provenance_exclude_list` will *not* be copied to the repo. The # advantage of this explicit copying instead of deleting the files from `provenance_exclude_list` from the sandbox # first before moving the entire remaining content to the node's repository, is that in this way we are guaranteed # not to accidentally move files to the repository that should not go there at all cost. Note that all entries in # the provenance exclude list are normalized first, just as the paths that are in the sandbox folder, otherwise the # direct equality test may fail, e.g.: './path/file.txt' != 'path/file.txt' even though they reference the same file provenance_exclude_list = [ os.path.normpath(entry) for entry in provenance_exclude_list ] for root, _, filenames in os.walk(folder.abspath): for filename in filenames: filepath = os.path.join(root, filename) relpath = os.path.normpath( os.path.relpath(filepath, folder.abspath)) if relpath not in provenance_exclude_list: with open(filepath, 'rb') as handle: node._repository.put_object_from_filelike(handle, relpath, 'wb', force=True) # pylint: disable=protected-access if not dry_run: # Make sure that attaching the `remote_folder` with a link is the last thing we do. This gives the biggest # chance of making this method idempotent. That is to say, if a runner gets interrupted during this action, it # will simply retry the upload, unless we got here and managed to link it up, in which case we move to the next # task. Because in that case, the check for the existence of this link at the top of this function will exit # early from this command. remotedata = RemoteData(computer=computer, remote_path=workdir) remotedata.add_incoming(node, link_type=LinkType.CREATE, link_label='remote_folder') remotedata.store()