def test_gaussian(fixture_code): geometry_file = os.path.join(tests.TEST_DIR, "data", 'ch4.xyz') expected_inp_file = os.path.join(tests.TEST_DIR, "data", 'gaussian_test.inp') # structure structure = StructureData( pymatgen_molecule=Molecule.from_file(geometry_file)) num_cores = 1 memory_mb = 1000 # Main parameters: geometry optimization parameters = { 'link0_parameters': { '%chk': 'aiida.chk', '%mem': "%dMB" % memory_mb, '%nprocshared': str(num_cores), }, 'functional': 'BLYP', 'basis_set': '6-31g', 'charge': 0, 'multiplicity': 1, 'route_parameters': { 'scf': { 'maxcycle': 512, 'cdiis': None }, 'nosymm': None, 'opt': None, }, } # Build the inputs dictionary with a "fake" executable inputs = { 'code': fixture_code('gaussian'), #load_code("gaussian09@localhost"), 'structure': structure, 'parameters': Dict(dict=parameters), 'metadata': { 'options': { 'resources': { 'num_machines': 1, 'tot_num_mpiprocs': 1, }, 'max_wallclock_seconds': 1800, 'withmpi': False } } } # Prepare the fake calculation for submission in a "sandbox folder" from aiida.engine.utils import instantiate_process from aiida.manage.manager import get_manager from aiida.common.folders import SandboxFolder manager = get_manager() runner = manager.get_runner() process_class = CalculationFactory('gaussian') process = instantiate_process(runner, process_class, **inputs) sandbox_folder = SandboxFolder() calc_info = process.prepare_for_submission(sandbox_folder) with sandbox_folder.open('aiida.inp') as handle: input_written = handle.read() with open(expected_inp_file, 'r') as f: expected_inp = f.read() assert (input_written == expected_inp)
def upload_calculation(node: CalcJobNode, transport: Transport, calc_info: CalcInfo, folder: SandboxFolder, inputs: Optional[MappingType[str, Any]] = None, dry_run: bool = False) -> None: """Upload a `CalcJob` instance :param node: the `CalcJobNode`. :param transport: an already opened transport to use to submit the calculation. :param calc_info: the calculation info datastructure returned by `CalcJob.presubmit` :param folder: temporary local file system folder containing the inputs written by `CalcJob.prepare_for_submission` """ # pylint: disable=too-many-locals,too-many-branches,too-many-statements # If the calculation already has a `remote_folder`, simply return. The upload was apparently already completed # before, which can happen if the daemon is restarted and it shuts down after uploading but before getting the # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the upload. link_label = 'remote_folder' if node.get_outgoing(RemoteData, link_label_filter=link_label).first(): EXEC_LOGGER.warning( f'CalcJobNode<{node.pk}> already has a `{link_label}` output: skipping upload' ) return calc_info computer = node.computer codes_info = calc_info.codes_info input_codes = [ load_node(_.code_uuid, sub_classes=(Code, )) for _ in codes_info ] logger_extra = get_dblogger_extra(node) transport.set_logger_extra(logger_extra) logger = LoggerAdapter(logger=EXEC_LOGGER, extra=logger_extra) if not dry_run and node.has_cached_links(): raise ValueError( 'Cannot submit calculation {} because it has cached input links! If you just want to test the ' 'submission, set `metadata.dry_run` to True in the inputs.'.format( node.pk)) # If we are performing a dry-run, the working directory should actually be a local folder that should already exist if dry_run: workdir = transport.getcwd() else: remote_user = transport.whoami() remote_working_directory = computer.get_workdir().format( username=remote_user) if not remote_working_directory.strip(): raise exceptions.ConfigurationError( "[submission of calculation {}] No remote_working_directory configured for computer '{}'" .format(node.pk, computer.label)) # If it already exists, no exception is raised try: transport.chdir(remote_working_directory) except IOError: logger.debug( '[submission of calculation {}] Unable to chdir in {}, trying to create it' .format(node.pk, remote_working_directory)) try: transport.makedirs(remote_working_directory) transport.chdir(remote_working_directory) except EnvironmentError as exc: raise exceptions.ConfigurationError( '[submission of calculation {}] ' 'Unable to create the remote directory {} on ' "computer '{}': {}".format(node.pk, remote_working_directory, computer.label, exc)) # Store remotely with sharding (here is where we choose # the folder structure of remote jobs; then I store this # in the calculation properties using _set_remote_dir # and I do not have to know the logic, but I just need to # read the absolute path from the calculation properties. transport.mkdir(calc_info.uuid[:2], ignore_existing=True) transport.chdir(calc_info.uuid[:2]) transport.mkdir(calc_info.uuid[2:4], ignore_existing=True) transport.chdir(calc_info.uuid[2:4]) try: # The final directory may already exist, most likely because this function was already executed once, but # failed and as a result was rescheduled by the eninge. In this case it would be fine to delete the folder # and create it from scratch, except that we cannot be sure that this the actual case. Therefore, to err on # the safe side, we move the folder to the lost+found directory before recreating the folder from scratch transport.mkdir(calc_info.uuid[4:]) except OSError: # Move the existing directory to lost+found, log a warning and create a clean directory anyway path_existing = os.path.join(transport.getcwd(), calc_info.uuid[4:]) path_lost_found = os.path.join(remote_working_directory, REMOTE_WORK_DIRECTORY_LOST_FOUND) path_target = os.path.join(path_lost_found, calc_info.uuid) logger.warning( f'tried to create path {path_existing} but it already exists, moving the entire folder to {path_target}' ) # Make sure the lost+found directory exists, then copy the existing folder there and delete the original transport.mkdir(path_lost_found, ignore_existing=True) transport.copytree(path_existing, path_target) transport.rmtree(path_existing) # Now we can create a clean folder for this calculation transport.mkdir(calc_info.uuid[4:]) finally: transport.chdir(calc_info.uuid[4:]) # I store the workdir of the calculation for later file retrieval workdir = transport.getcwd() node.set_remote_workdir(workdir) # I first create the code files, so that the code can put # default files to be overwritten by the plugin itself. # Still, beware! The code file itself could be overwritten... # But I checked for this earlier. for code in input_codes: if code.is_local(): # Note: this will possibly overwrite files for filename in code.list_object_names(): # Note, once #2579 is implemented, use the `node.open` method instead of the named temporary file in # combination with the new `Transport.put_object_from_filelike` # Since the content of the node could potentially be binary, we read the raw bytes and pass them on with NamedTemporaryFile(mode='wb+') as handle: handle.write(code.get_object_content(filename, mode='rb')) handle.flush() transport.put(handle.name, filename) transport.chmod(code.get_local_executable(), 0o755) # rwxr-xr-x # local_copy_list is a list of tuples, each with (uuid, dest_rel_path) # NOTE: validation of these lists are done inside calculation.presubmit() local_copy_list = calc_info.local_copy_list or [] remote_copy_list = calc_info.remote_copy_list or [] remote_symlink_list = calc_info.remote_symlink_list or [] provenance_exclude_list = calc_info.provenance_exclude_list or [] for uuid, filename, target in local_copy_list: logger.debug( f'[submission of calculation {node.uuid}] copying local file/folder to {target}' ) try: data_node = load_node(uuid=uuid) except exceptions.NotExistent: data_node = _find_data_node(inputs, uuid) if inputs else None if data_node is None: logger.warning( f'failed to load Node<{uuid}> specified in the `local_copy_list`' ) else: dirname = os.path.dirname(target) if dirname: os.makedirs(os.path.join(folder.abspath, dirname), exist_ok=True) with folder.open(target, 'wb') as handle: with data_node.open(filename, 'rb') as source: shutil.copyfileobj(source, handle) provenance_exclude_list.append(target) # In a dry_run, the working directory is the raw input folder, which will already contain these resources if not dry_run: for filename in folder.get_content_list(): logger.debug( f'[submission of calculation {node.pk}] copying file/folder {filename}...' ) transport.put(folder.get_abs_path(filename), filename) for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_copy_list: if remote_computer_uuid == computer.uuid: logger.debug( '[submission of calculation {}] copying {} remotely, directly on the machine {}' .format(node.pk, dest_rel_path, computer.label)) try: transport.copy(remote_abs_path, dest_rel_path) except (IOError, OSError): logger.warning( '[submission of calculation {}] Unable to copy remote resource from {} to {}! ' 'Stopping.'.format(node.pk, remote_abs_path, dest_rel_path)) raise else: raise NotImplementedError( '[submission of calculation {}] Remote copy between two different machines is ' 'not implemented yet'.format(node.pk)) for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_symlink_list: if remote_computer_uuid == computer.uuid: logger.debug( '[submission of calculation {}] copying {} remotely, directly on the machine {}' .format(node.pk, dest_rel_path, computer.label)) try: transport.symlink(remote_abs_path, dest_rel_path) except (IOError, OSError): logger.warning( '[submission of calculation {}] Unable to create remote symlink from {} to {}! ' 'Stopping.'.format(node.pk, remote_abs_path, dest_rel_path)) raise else: raise IOError( f'It is not possible to create a symlink between two different machines for calculation {node.pk}' ) else: if remote_copy_list: with open(os.path.join(workdir, '_aiida_remote_copy_list.txt'), 'w') as handle: for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_copy_list: handle.write( 'would have copied {} to {} in working directory on remote {}' .format(remote_abs_path, dest_rel_path, computer.label)) if remote_symlink_list: with open(os.path.join(workdir, '_aiida_remote_symlink_list.txt'), 'w') as handle: for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_symlink_list: handle.write( 'would have created symlinks from {} to {} in working directory on remote {}' .format(remote_abs_path, dest_rel_path, computer.label)) # Loop recursively over content of the sandbox folder copying all that are not in `provenance_exclude_list`. Note # that directories are not created explicitly. The `node.put_object_from_filelike` call will create intermediate # directories for nested files automatically when needed. This means though that empty folders in the sandbox or # folders that would be empty when considering the `provenance_exclude_list` will *not* be copied to the repo. The # advantage of this explicit copying instead of deleting the files from `provenance_exclude_list` from the sandbox # first before moving the entire remaining content to the node's repository, is that in this way we are guaranteed # not to accidentally move files to the repository that should not go there at all cost. Note that all entries in # the provenance exclude list are normalized first, just as the paths that are in the sandbox folder, otherwise the # direct equality test may fail, e.g.: './path/file.txt' != 'path/file.txt' even though they reference the same file provenance_exclude_list = [ os.path.normpath(entry) for entry in provenance_exclude_list ] for root, _, filenames in os.walk(folder.abspath): for filename in filenames: filepath = os.path.join(root, filename) relpath = os.path.normpath( os.path.relpath(filepath, folder.abspath)) if relpath not in provenance_exclude_list: with open(filepath, 'rb') as handle: node._repository.put_object_from_filelike(handle, relpath, 'wb', force=True) # pylint: disable=protected-access if not dry_run: # Make sure that attaching the `remote_folder` with a link is the last thing we do. This gives the biggest # chance of making this method idempotent. That is to say, if a runner gets interrupted during this action, it # will simply retry the upload, unless we got here and managed to link it up, in which case we move to the next # task. Because in that case, the check for the existence of this link at the top of this function will exit # early from this command. remotedata = RemoteData(computer=computer, remote_path=workdir) remotedata.add_incoming(node, link_type=LinkType.CREATE, link_label='remote_folder') remotedata.store()