def remotedata(self): """Create an remote data""" from aiida.orm import RemoteData rmd = RemoteData() rmd.computer = self.localhost rmd.set_remote_path(str(self._workdir)) return rmd
def test_is_neb(vasp_code, poscar, is_restart, is_neb): from aiida.orm import RemoteData from aiida.common.links import LinkType from aiida.plugins import CalculationFactory from aiida.engine import run_get_node from aiida_cusp.data import VaspPotcarData # define code vasp_code.set_attribute('input_plugin', 'cusp.vasp') # setup calculator inputs = { 'code': vasp_code, 'metadata': {'options': {'resources': {'num_machines': 1}}}, } if is_neb: neb_path = {'node_00': poscar, 'node_01': poscar, 'node_02': poscar} inputs.update({'neb_path': neb_path}) else: inputs.update({'poscar': poscar}) VaspCalculation = CalculationFactory('cusp.vasp') vasp_calc_base = VaspCalculation(inputs=inputs) # if restart create a second calculator using a remote_folder connected # to the first calculation as input if is_restart: inputs.pop('poscar', None) inputs.pop('neb_path', None) remote_data = RemoteData(computer=vasp_code.computer, remote_path='') remote_data.add_incoming(vasp_calc_base.node.store(), link_type=LinkType.CREATE, link_label='remote_folder') inputs.update({'restart': {'folder': remote_data}}) vasp_calc_base = VaspCalculation(inputs=inputs) # assert is_neb() returns the desired result result = vasp_calc_base.is_neb() assert result is is_neb
def _generate_remote_data(computer, remote_path, entry_point_name=None): """Generate a RemoteData node, loctated at remote_path""" from aiida.common.links import LinkType from aiida.orm import CalcJobNode, RemoteData from aiida.plugins.entry_point import format_entry_point_string entry_point = format_entry_point_string('aiida.calculations', entry_point_name) remote = RemoteData(remote_path=remote_path) remote.computer = computer if entry_point_name is not None: creator = CalcJobNode(computer=computer, process_type=entry_point) creator.set_option('resources', { 'num_machines': 1, 'num_mpiprocs_per_machine': 1 }) #creator.set_attribute('prefix', 'aiida') remote.add_incoming(creator, link_type=LinkType.CREATE, link_label='remote_folder') creator.store() return remote
class TestVerdiDataRemote(AiidaTestCase): """ Testing verdi data remote """ @classmethod def setUpClass(cls): super(TestVerdiDataRemote, cls).setUpClass() user = orm.User.objects.get_default() orm.AuthInfo(cls.computer, user).store() def setUp(self): comp = self.computer self.r = RemoteData() p = tempfile.mkdtemp() self.r.set_remote_path(p) with io.open(p + '/file.txt', 'w', encoding='utf8') as fhandle: fhandle.write(u'test string') self.r.computer = comp self.r.store() self.cli_runner = CliRunner() def test_remoteshowhelp(self): output = sp.check_output(['verdi', 'data', 'remote', 'show', '--help']) self.assertIn(b'Usage:', output, 'Sub-command verdi data remote show --help failed.') def test_remoteshow(self): options = [str(self.r.id)] res = self.cli_runner.invoke(cmd_remote.remote_show, options, catch_exceptions=False) self.assertEqual(res.exit_code, 0, 'The command verdi data remote show did not finish correctly') self.assertIn(b'Remote computer name:', res.stdout_bytes, 'The string "Remote computer name:" was not found in the' ' output of verdi data remote show') self.assertIn(b'Remote folder full path:', res.stdout_bytes, 'The string "Remote folder full path:" was not found in the' ' output of verdi data remote show') def test_remotelshelp(self): output = sp.check_output(['verdi', 'data', 'remote', 'ls', '--help']) self.assertIn(b'Usage:', output, 'Sub-command verdi data remote ls --help failed.') def test_remotels(self): options = ['--long', str(self.r.id)] res = self.cli_runner.invoke(cmd_remote.remote_ls, options, catch_exceptions=False) self.assertEqual(res.exit_code, 0, 'The command verdi data remote ls did not finish correctly') self.assertIn(b'file.txt', res.stdout_bytes, 'The file "file.txt" was not found in the output' ' of verdi data remote ls') def test_remotecathelp(self): output = sp.check_output(['verdi', 'data', 'remote', 'cat', '--help']) self.assertIn(b'Usage:', output, 'Sub-command verdi data remote cat --help failed.') def test_remotecat(self): options = [str(self.r.id), 'file.txt'] res = self.cli_runner.invoke(cmd_remote.remote_cat, options, catch_exceptions=False) self.assertEqual(res.exit_code, 0, 'The command verdi data remote cat did not finish correctly') self.assertIn(b'test string', res.stdout_bytes, 'The string "test string" was not found in the output' ' of verdi data remote cat file.txt')
def setUp(self): comp = self.computer self.r = RemoteData() p = tempfile.mkdtemp() self.r.set_remote_path(p) with io.open(p + '/file.txt', 'w', encoding='utf8') as fhandle: fhandle.write(u'test string') self.r.computer = comp self.r.store() self.cli_runner = CliRunner()
def setUp(self): comp = self.computer self.rmt = RemoteData() path = tempfile.mkdtemp() self.rmt.set_remote_path(path) with open(os.path.join(path, 'file.txt'), 'w', encoding='utf8') as fhandle: fhandle.write('test string') self.rmt.computer = comp self.rmt.store() self.cli_runner = CliRunner()
def setUp(self): """Create a dummy RemoteData on the default computer.""" self.tmp_path = tempfile.mkdtemp() self.remote = RemoteData(computer=self.computer) self.remote.set_remote_path(self.tmp_path) with io.open(os.path.join(self.tmp_path, 'file.txt'), 'w', encoding='utf8') as fhandle: fhandle.write(u'test string') self.remote.computer = self.computer self.remote.store()
def test_neb_poscar_overwrite_switch(switch, tmpdir, vasp_code, aiida_sandbox, monkeypatch): import pathlib from aiida.orm import RemoteData from aiida.plugins import CalculationFactory from aiida_cusp.data import VaspPotcarData # set the input plugin for code vasp_code.set_attribute('input_plugin', 'cusp.vasp') # setup a remote restart directory with POSCAR and CONTCAR computer = vasp_code.computer subfolders = ['00', '01', '02'] for subfolder in subfolders: pathlib.Path(tmpdir / subfolder).mkdir() pathlib.Path(tmpdir / subfolder / 'POSCAR').touch() pathlib.Path(tmpdir / subfolder / 'CONTCAR').touch() remote_path = str(tmpdir) remote_data = RemoteData(computer=computer, remote_path=remote_path) inputs = { 'code': vasp_code, 'restart': { 'folder': remote_data, 'contcar_to_poscar': switch }, 'metadata': { 'options': { 'resources': { 'num_machines': 1 } } }, } VaspCalculation = CalculationFactory('cusp.vasp') # mock the is_neb() method to avoid the search of the remote_folders # parent CalcJobNode (we know it **is** a NEB calculation!) monkeypatch.setattr(VaspCalculation, 'is_neb', lambda self: True) vasp_neb_calculation = VaspCalculation(inputs=inputs) calcinfo = vasp_neb_calculation.prepare_for_submission(aiida_sandbox) remote_copy_list = calcinfo.remote_copy_list for subfolder in subfolders: # find the remote_copy_list for a specific NEB subfolder reduced_remote_list = [] for (uuid, abspath_remote, relpath_input) in remote_copy_list: if pathlib.Path(abspath_remote).parent.name == subfolder: reduced_remote_list.append((abspath_remote, relpath_input)) copied_files = [pathlib.Path(f).name for (f, _) in reduced_remote_list] # the contcar file will always be copied no matter if the switch is # set or not assert 'CONTCAR' in copied_files # now check for a single NEB subfolder if CONTCAR is copied on itself # or on the new POSCAR for (abspath_remote, relpath_input) in reduced_remote_list: filename_remote = pathlib.Path(abspath_remote).name filename_input = pathlib.Path(relpath_input).name if filename_remote == 'CONTCAR': if switch: # True: CONTCAR --> POSCAR assert filename_input == 'POSCAR' assert 'POSCAR' not in copied_files else: # False: CONTCAR --> CONTCAR assert filename_input == 'CONTCAR' assert 'POSCAR' in copied_files
def fixture_remotedata(fixture_localhost, shared_datadir): """ Return a `RemoteData` with contents from the specified directory. Optionally a mapping of strings to replace in the filenames can be passed. Note that the order of replacement is not guaranteed. The RemoteData node is yielded and points to a folder in /tmp, and is removed at the end """ from aiida.orm import RemoteData from aiida.common.folders import SandboxFolder replacement_mapping = {'gaas': 'aiida'} dir_path = str( shared_datadir / 'gaas') # TODO: Remove cast to 'str' when Python2 support is dropped. # TODO: replace with tempfile.TemporaryDirectory when Python2 support is # dropped. Note that some things will change, e.g. sandbox.abspath # becomes tempdir.name, or similary `insert_path` needs to be changed. with SandboxFolder() as sandbox: remote = RemoteData(remote_path=sandbox.abspath, computer=fixture_localhost) for file_path in os.listdir(dir_path): abs_path = os.path.abspath(os.path.join(dir_path, file_path)) res_file_path = file_path for old, new in replacement_mapping.items(): res_file_path = res_file_path.replace( old, new) # put using correct method sandbox.insert_path(abs_path, res_file_path) yield remote
def test_calcjob_submit_mgo(db_test_app): # type: (AiidaTestApp, bool) -> None """Test submitting a calculation.""" parameters = Dict(dict={"shrink_is": 18, "shrink_isp": 36, "npoints": 20}) metadata = { "options": { "withmpi": False, "resources": { "num_machines": 1, "num_mpiprocs_per_machine": 1, }, "max_wallclock_seconds": 30, "input_wf_name": "fort.9", }, "dry_run": True, } # set up calculation builder = db_test_app.get_or_create_code("crystal17.ech3").get_builder() builder.metadata = metadata builder.parameters = parameters with resource_context("ech3", "mgo_sto3g_scf") as path: builder.wf_folder = RemoteData( remote_path=str(path), computer=db_test_app.get_or_create_computer()) process_options = builder.process_class( inputs=builder).metadata.options with db_test_app.sandbox_folder() as folder: calc_info = db_test_app.generate_calcinfo("crystal17.ech3", folder, builder) # Check the attributes of the returned `CalcInfo` assert calc_info.codes_info[0].cmdline_params == [] assert sorted(calc_info.local_copy_list) == sorted([]) assert sorted(calc_info.retrieve_list) == sorted(["main.out"]) assert sorted(calc_info.retrieve_temporary_list) == sorted( ["DENS_CUBE.DAT", "SPIN_CUBE.DAT"]) assert sorted(folder.get_content_list()) == sorted( [process_options.input_file_name]) with folder.open(process_options.input_file_name) as f: input_content = f.read() expected_input = dedent("""\ ECH3 20 END""") assert input_content == expected_input
def test_remote_folder_filelist(vasp_code, filename, relpath, aiida_sandbox): import pathlib from aiida.orm import RemoteData from aiida.orm import load_computer from aiida_cusp.calculators.calculation_base import CalculationBase sandbox = pathlib.Path(aiida_sandbox.abspath).absolute() remote = RemoteData(computer=vasp_code.computer, remote_path=str(sandbox)) sandbox = pathlib.Path(aiida_sandbox.abspath).absolute() # create file with name at given subdir filepath = (sandbox / relpath).absolute() if not relpath == '.': filepath.mkdir(parents=True) filepath = filepath / filename filepath.touch() assert filepath.exists() is True # initialize remote directory from sandbox folder remote = RemoteData(computer=vasp_code.computer, remote_path=str(sandbox)) # setup the calculator inputs = { 'code': vasp_code, 'restart': { 'folder': remote }, 'metadata': { 'options': { 'resources': { 'num_machines': 1 } } }, } Base = CalculationBase(inputs=inputs) remote_filelist = Base.remote_filelist(remote) expected_filelist = [( filename, # the file name str(filepath), # the absolute path including the file's name str(relpath), # the relative path without the file )] assert remote_filelist == expected_filelist
def test_run_mgo_scf(db_test_app, sanitise_calc_attr, data_regression): # type: (AiidaTestApp) -> None """Test running a calculation.""" parameters = Dict( dict={ "k_points": [18, 36], "npoints": 100, "band_minimum": -10, "band_maximum": 10, "band_units": "eV", }) metadata = { "options": { "withmpi": False, "resources": { "num_machines": 1, "num_mpiprocs_per_machine": 1, }, "max_wallclock_seconds": 30, "input_wf_name": "fort.9", } } # set up calculation builder = db_test_app.get_or_create_code("crystal17.doss").get_builder() builder.metadata = metadata builder.parameters = parameters with resource_context("doss", "mgo_sto3g_scf") as path: builder.wf_folder = RemoteData( remote_path=str(path), computer=db_test_app.get_or_create_computer()) output = run_get_node(builder) calc_node = output.node db_test_app.check_calculation(calc_node, ["results", "arrays"]) calc_attributes = sanitise_calc_attr(calc_node.attributes) results = { k: round(i, 7) if isinstance(i, float) else i for k, i in calc_node.outputs.results.attributes.items() if k not in ["execution_time_seconds"] } data_regression.check({ "calc": calc_attributes, "results": results, "arrays": calc_node.outputs.arrays.attributes, })
def _generate_remote_data(computer, remote_path, entry_point_name=None, extras_root=[]): """Return a `KpointsData` with a mesh of npoints in each direction.""" from aiida.common.links import LinkType from aiida.orm import CalcJobNode, RemoteData, Dict from aiida.plugins.entry_point import format_entry_point_string entry_point = format_entry_point_string('aiida.calculations', entry_point_name) remote = RemoteData(remote_path=remote_path) remote.computer = computer if entry_point_name is not None: creator = CalcJobNode(computer=computer, process_type=entry_point) creator.set_option('resources', { 'num_machines': 1, 'num_mpiprocs_per_machine': 1 }) remote.add_incoming(creator, link_type=LinkType.CREATE, link_label='remote_folder') for extra in extras_root: to_link = extra[0] if isinstance(to_link, dict): to_link = Dict(dict=to_link) to_link.store() creator.add_incoming(to_link, link_type=LinkType.INPUT_CALC, link_label=extra[1]) creator.store() return remote
def test_defined_inputs_are_preferred(use_incar, use_kpoints, tmpdir, vasp_code, aiida_sandbox, incar, kpoints, monkeypatch): import pathlib from aiida.orm import RemoteData from aiida.plugins import CalculationFactory from aiida_cusp.data import VaspPotcarData # set the input plugin for code vasp_code.set_attribute('input_plugin', 'cusp.vasp') # setup a remote restart directory with POSCAR and CONTCAR computer = vasp_code.computer pathlib.Path(tmpdir / 'INCAR').touch() pathlib.Path(tmpdir / 'KPOINTS').touch() remote_path = str(tmpdir) remote_data = RemoteData(computer=computer, remote_path=remote_path) inputs = { 'code': vasp_code, 'restart': { 'folder': remote_data }, 'metadata': { 'options': { 'resources': { 'num_machines': 1 } } }, } if use_incar: inputs.update({'incar': incar}) if use_kpoints: inputs.update({'kpoints': kpoints}) VaspCalculation = CalculationFactory('cusp.vasp') # mock the is_neb() method to avoid the search of the remote_folders # parent CalcJobNode (we know it's **not** a NEB calculation!) monkeypatch.setattr(VaspCalculation, 'is_neb', lambda self: False) vasp_basic_calculation = VaspCalculation(inputs=inputs) calcinfo = vasp_basic_calculation.prepare_for_submission(aiida_sandbox) remote_copy_list = calcinfo.remote_copy_list files_to_copy = [pathlib.Path(f).name for (_, f, _) in remote_copy_list] if use_incar: # incar input defined: do not copy assert 'INCAR' not in files_to_copy else: assert 'INCAR' in files_to_copy if use_kpoints: # kpoint input defined: do not copy assert 'KPOINTS' not in files_to_copy else: assert 'KPOINTS' in files_to_copy
def test_create_builder(db_test_app, data_regression): with SandboxFolder() as folder: with open_resource_binary("crystal", "nio_sto3g_afm_scf", "INPUT") as handle: folder.create_file_from_filelike(handle, "INPUT", mode="wb") with open_resource_binary("crystal", "nio_sto3g_afm_scf", "main.out") as handle: folder.create_file_from_filelike(handle, "main.out", mode="wb") remote = RemoteData( remote_path=folder.abspath, computer=db_test_app.get_or_create_computer() ) builder = populate_builder(remote) assert set(builder["basissets"].keys()) == set(["Ni", "O"]) data_regression.check(builder.parameters.attributes, "test_create_builder_params") expected_settings = { "kinds": { "spin_alpha": ["Ni"], "spin_beta": ["Ni1"], }, "operations": [ [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], [-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0], [0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0], [-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0], [0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0], [-1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0], [0.0, -1.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0], [-1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0], [0.0, -1.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], ], "space_group": 1, "crystal_type": 1, "centring_code": 1, } assert builder.symmetry.compare_operations(expected_settings["operations"]) == {}
def test_run_mgo_scf(db_test_app, sanitise_calc_attr, data_regression): # type: (AiidaTestApp) -> None """Test running a calculation.""" parameters = Dict(dict={ "npoints": 20, }) metadata = { "options": { "withmpi": False, "resources": { "num_machines": 1, "num_mpiprocs_per_machine": 1, }, "max_wallclock_seconds": 30, "input_wf_name": "fort.9", } } # set up calculation builder = db_test_app.get_or_create_code("crystal17.ech3").get_builder() builder.metadata = metadata builder.parameters = parameters with resource_context("ech3", "mgo_sto3g_scf") as path: builder.wf_folder = RemoteData( remote_path=str(path), computer=db_test_app.get_or_create_computer()) output = run_get_node(builder) calc_node = output.node db_test_app.check_calculation(calc_node, ["results", "charge"]) calc_attributes = sanitise_calc_attr(calc_node.attributes) results = recursive_round(calc_node.outputs.results.attributes, 5) results.pop("execution_time_seconds", None) results.pop("parser_version", None) data_regression.check({ "calc": calc_attributes, "results": results, "charge": recursive_round(calc_node.outputs.charge.attributes, 5), })
def test_full_nio_afm(db_test_app, data_regression): code = db_test_app.get_or_create_code("crystal17.main") metadata = get_default_metadata() metadata["options"].update( {"input_file_name": "other.d12", "output_main_file_name": "other2.out"} ) with SandboxFolder() as folder: with open_resource_binary("crystal", "nio_sto3g_afm_scf", "INPUT") as handle: folder.create_file_from_filelike(handle, "other.d12", mode="wb") with open_resource_binary("crystal", "nio_sto3g_afm_scf", "main.out") as handle: folder.create_file_from_filelike(handle, "other2.out", mode="wb") remote = RemoteData( remote_path=folder.abspath, computer=db_test_app.get_or_create_computer() ) builder = populate_builder(remote, code=code, metadata=metadata) node = immigrate_existing(builder, remote) attributes = node.attributes attributes["remote_workdir"] = "path/to/remote" attributes["version"] = None data_regression.check(attributes) assert set(node.inputs) == set( [ "basissets__Ni", "basissets__O", "parameters", "structure", "symmetry", "kinds", "code", ] ) assert set(node.outputs) == set(["results", "remote_folder", "retrieved"])
def test_calcjob_submit_mgo_remote(db_test_app): # type: (AiidaTestApp, bool) -> None """Test submitting a calculation, using a remote folder input for wf_input.""" builder = db_test_app.get_or_create_code("crystal17.newk").get_builder() builder.metadata = get_metadata() builder.parameters = Dict(dict={"k_points": [18, 36]}) with resource_context("newk", "mgo_sto3g_scf", "fort.9") as path: builder.wf_folder = RemoteData( remote_path=str(path), computer=db_test_app.get_or_create_computer()) process_options = builder.process_class( inputs=builder).metadata.options with db_test_app.sandbox_folder() as folder: db_test_app.generate_calcinfo("crystal17.newk", folder, builder) assert folder.get_content_list() == [ process_options.input_file_name ]
class TestRemoteData(AiidaTestCase): """Test for the RemoteData class.""" @classmethod def setUpClass(cls): super(TestRemoteData, cls).setUpClass() user = User.objects.get_default() authinfo = AuthInfo(cls.computer, user) authinfo.store() def setUp(self): """Create a dummy RemoteData on the default computer.""" self.tmp_path = tempfile.mkdtemp() self.remote = RemoteData(computer=self.computer) self.remote.set_remote_path(self.tmp_path) with io.open(os.path.join(self.tmp_path, 'file.txt'), 'w', encoding='utf8') as fhandle: fhandle.write(u'test string') self.remote.computer = self.computer self.remote.store() def tearDown(self): """Delete the temporary path for the dummy RemoteData node.""" try: shutil.rmtree(self.tmp_path) except OSError as exception: if exception.errno == errno.ENOENT: pass elif exception.errno == errno.ENOTDIR: os.remove(self.tmp_path) else: raise IOError(exception) def test_clean(self): """Try cleaning a RemoteData node.""" self.assertFalse(self.remote.is_empty) self.remote._clean() self.assertTrue(self.remote.is_empty)
def upload_calculation(node, transport, calc_info, script_filename, dry_run=False): """Upload a `CalcJob` instance :param node: the `CalcJobNode`. :param transport: an already opened transport to use to submit the calculation. :param calc_info: the calculation info datastructure returned by `CalcJobNode.presubmit` :param script_filename: the job launch script returned by `CalcJobNode.presubmit` :return: tuple of ``calc_info`` and ``script_filename`` """ from logging import LoggerAdapter from tempfile import NamedTemporaryFile from aiida.orm import load_node, Code, RemoteData # If the calculation already has a `remote_folder`, simply return. The upload was apparently already completed # before, which can happen if the daemon is restarted and it shuts down after uploading but before getting the # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the upload. link_label = 'remote_folder' if node.get_outgoing(RemoteData, link_label_filter=link_label).first(): execlogger.warning( 'CalcJobNode<{}> already has a `{}` output: skipping upload'. format(node.pk, link_label)) return calc_info, script_filename computer = node.computer codes_info = calc_info.codes_info input_codes = [ load_node(_.code_uuid, sub_classes=(Code, )) for _ in codes_info ] logger_extra = get_dblogger_extra(node) transport.set_logger_extra(logger_extra) logger = LoggerAdapter(logger=execlogger, extra=logger_extra) if not dry_run and node.has_cached_links(): raise ValueError( 'Cannot submit calculation {} because it has cached input links! If you just want to test the ' 'submission, set `metadata.dry_run` to True in the inputs.'.format( node.pk)) folder = node._raw_input_folder # If we are performing a dry-run, the working directory should actually be a local folder that should already exist if dry_run: workdir = transport.getcwd() else: remote_user = transport.whoami() # TODO Doc: {username} field # TODO: if something is changed here, fix also 'verdi computer test' remote_working_directory = computer.get_workdir().format( username=remote_user) if not remote_working_directory.strip(): raise exceptions.ConfigurationError( "[submission of calculation {}] No remote_working_directory configured for computer '{}'" .format(node.pk, computer.name)) # If it already exists, no exception is raised try: transport.chdir(remote_working_directory) except IOError: logger.debug( '[submission of calculation {}] Unable to chdir in {}, trying to create it' .format(node.pk, remote_working_directory)) try: transport.makedirs(remote_working_directory) transport.chdir(remote_working_directory) except EnvironmentError as exc: raise exceptions.ConfigurationError( '[submission of calculation {}] ' 'Unable to create the remote directory {} on ' "computer '{}': {}".format(node.pk, remote_working_directory, computer.name, exc)) # Store remotely with sharding (here is where we choose # the folder structure of remote jobs; then I store this # in the calculation properties using _set_remote_dir # and I do not have to know the logic, but I just need to # read the absolute path from the calculation properties. transport.mkdir(calc_info.uuid[:2], ignore_existing=True) transport.chdir(calc_info.uuid[:2]) transport.mkdir(calc_info.uuid[2:4], ignore_existing=True) transport.chdir(calc_info.uuid[2:4]) try: # The final directory may already exist, most likely because this function was already executed once, but # failed and as a result was rescheduled by the eninge. In this case it would be fine to delete the folder # and create it from scratch, except that we cannot be sure that this the actual case. Therefore, to err on # the safe side, we move the folder to the lost+found directory before recreating the folder from scratch transport.mkdir(calc_info.uuid[4:]) except OSError: # Move the existing directory to lost+found, log a warning and create a clean directory anyway path_existing = os.path.join(transport.getcwd(), calc_info.uuid[4:]) path_lost_found = os.path.join(remote_working_directory, REMOTE_WORK_DIRECTORY_LOST_FOUND) path_target = os.path.join(path_lost_found, calc_info.uuid) logger.warning( 'tried to create path {} but it already exists, moving the entire folder to {}' .format(path_existing, path_target)) # Make sure the lost+found directory exists, then copy the existing folder there and delete the original transport.mkdir(path_lost_found, ignore_existing=True) transport.copytree(path_existing, path_target) transport.rmtree(path_existing) # Now we can create a clean folder for this calculation transport.mkdir(calc_info.uuid[4:]) finally: transport.chdir(calc_info.uuid[4:]) # I store the workdir of the calculation for later file retrieval workdir = transport.getcwd() node.set_remote_workdir(workdir) # I first create the code files, so that the code can put # default files to be overwritten by the plugin itself. # Still, beware! The code file itself could be overwritten... # But I checked for this earlier. for code in input_codes: if code.is_local(): # Note: this will possibly overwrite files for f in code.get_folder_list(): transport.put(code.get_abs_path(f), f) transport.chmod(code.get_local_executable(), 0o755) # rwxr-xr-x # In a dry_run, the working directory is the raw input folder, which will already contain these resources if not dry_run: for filename in folder.get_content_list(): logger.debug( '[submission of calculation {}] copying file/folder {}...'. format(node.pk, filename)) transport.put(folder.get_abs_path(filename), filename) # local_copy_list is a list of tuples, each with (uuid, dest_rel_path) # NOTE: validation of these lists are done inside calculation.presubmit() local_copy_list = calc_info.local_copy_list or [] remote_copy_list = calc_info.remote_copy_list or [] remote_symlink_list = calc_info.remote_symlink_list or [] for uuid, filename, target in local_copy_list: logger.debug( '[submission of calculation {}] copying local file/folder to {}'. format(node.pk, target)) try: data_node = load_node(uuid=uuid) except exceptions.NotExistent: logger.warning( 'failed to load Node<{}> specified in the `local_copy_list`'. format(uuid)) # Note, once #2579 is implemented, use the `node.open` method instead of the named temporary file in # combination with the new `Transport.put_object_from_filelike` # Since the content of the node could potentially be binary, we read the raw bytes and pass them on with NamedTemporaryFile(mode='wb+') as handle: handle.write(data_node.get_object_content(filename, mode='rb')) handle.flush() handle.seek(0) transport.put(handle.name, target) if dry_run: if remote_copy_list: with open(os.path.join(workdir, '_aiida_remote_copy_list.txt'), 'w') as handle: for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_copy_list: handle.write( 'would have copied {} to {} in working directory on remote {}' .format(remote_abs_path, dest_rel_path, computer.name)) if remote_symlink_list: with open(os.path.join(workdir, '_aiida_remote_symlink_list.txt'), 'w') as handle: for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_symlink_list: handle.write( 'would have created symlinks from {} to {} in working directory on remote {}' .format(remote_abs_path, dest_rel_path, computer.name)) else: for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_copy_list: if remote_computer_uuid == computer.uuid: logger.debug( '[submission of calculation {}] copying {} remotely, directly on the machine {}' .format(node.pk, dest_rel_path, computer.name)) try: transport.copy(remote_abs_path, dest_rel_path) except (IOError, OSError): logger.warning( '[submission of calculation {}] Unable to copy remote resource from {} to {}! ' 'Stopping.'.format(node.pk, remote_abs_path, dest_rel_path)) raise else: raise NotImplementedError( '[submission of calculation {}] Remote copy between two different machines is ' 'not implemented yet'.format(node.pk)) for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_symlink_list: if remote_computer_uuid == computer.uuid: logger.debug( '[submission of calculation {}] copying {} remotely, directly on the machine {}' .format(node.pk, dest_rel_path, computer.name)) try: transport.symlink(remote_abs_path, dest_rel_path) except (IOError, OSError): logger.warning( '[submission of calculation {}] Unable to create remote symlink from {} to {}! ' 'Stopping.'.format(node.pk, remote_abs_path, dest_rel_path)) raise else: raise IOError( 'It is not possible to create a symlink between two different machines for ' 'calculation {}'.format(node.pk)) if not dry_run: # Make sure that attaching the `remote_folder` with a link is the last thing we do. This gives the biggest # chance of making this method idempotent. That is to say, if a runner gets interrupted during this action, it # will simply retry the upload, unless we got here and managed to link it up, in which case we move to the next # task. Because in that case, the check for the existence of this link at the top of this function will exit # early from this command. remotedata = RemoteData(computer=computer, remote_path=workdir) remotedata.add_incoming(node, link_type=LinkType.CREATE, link_label='remote_folder') remotedata.store() return calc_info, script_filename
def test_calculation_restart_copy_remote(vasp_code, cstdn_code, tmpdir, testfile, from_remote, monkeypatch): import pathlib import shutil from aiida.orm import RemoteData from aiida.engine import run_get_node from aiida_cusp.utils.defaults import PluginDefaults from aiida_cusp.calculators.calculation_base import CalculationBase # set the input plugin for code vasp_code.set_attribute('input_plugin', 'cusp.vasp') cstdn_code.set_attribute('input_plugin', 'cusp.vasp') # configure computer computer = vasp_code.computer # create a clean workdir used by the computer workdir = pathlib.Path(tmpdir) / 'workdir' if workdir.exists(): shutil.rmtree(workdir) workdir.mkdir(parents=True) computer.set_workdir(str(workdir.absolute())) # create a clean remote dir and populate it with the testfile remote_path = pathlib.Path(tmpdir) / 'remote_dir' if remote_path.exists(): shutil.rmtree(remote_path) remote_path.mkdir(parents=True) # full path to the file of the remote (also create any subdirs inside # the remote folder if necessary) fpath = remote_path / testfile if not fpath.parent.exists(): fpath.parent.mkdir(parents=True) # write some unique content to the file which allows it to be # identifies as file copied in from remote remote_content = "{} remote file of parent calculation".format(fpath.name) with open(fpath, 'w') as remote_file: remote_file.write(remote_content) remote_data = RemoteData(computer=computer, remote_path=str(remote_path)) # connect the created remote folder to the calculation to simulate a # restarted calculation inputs = { 'code': vasp_code, 'custodian': { 'code': cstdn_code }, 'restart': { 'folder': remote_data }, 'metadata': { 'options': { 'resources': { 'num_machines': 1 } } }, } # mock the central create_calculation_inputs() method which is defined # on the corresponding subclasses. here we simply replace it with a # a call to the restart_copy_remote() method (without any checks). # additionally the custodian spec file is wriiten to check if it gets # accidentially copied to the working directory def mock(self, folder, calcinfo): self.restart_copy_remote(folder, calcinfo) spec_fname = folder.get_abs_path(PluginDefaults.CSTDN_SPEC_FNAME) pathlib.Path(spec_fname).touch() return calcinfo monkeypatch.setattr(CalculationBase, 'create_calculation_inputs', mock) # actually submit the calculation to check that remote contents are # indeed copied to the working directory calc_node = run_get_node(CalculationBase, **inputs) # inspect working directory files calc_workdir = pathlib.Path(calc_node.node.get_remote_workdir()) calc_file_name = calc_workdir / testfile with open(calc_file_name, 'r') as calc_input_file: calc_file_content = calc_input_file.read() if from_remote: assert calc_file_content == remote_content else: assert calc_file_content != remote_content
def test_bands_wc(fresh_aiida_env, potentials, mock_vasp): """Test with mocked vasp code.""" from aiida.orm import Code, Log, RemoteData from aiida.plugins import WorkflowFactory from aiida.engine import run workchain = WorkflowFactory('vasp.bands') mock_vasp.store() create_authinfo(computer=mock_vasp.computer, store=True) structure = PoscarParser(file_path=data_path('test_bands_wc', 'inp', 'POSCAR')).structure parameters = IncarParser(file_path=data_path('test_bands_wc', 'inp', 'INCAR')).incar parameters['system'] = 'test-case:test_bands_wc' # Make sure we replace encut with pwcutoff del parameters['encut'] parameters = {'vasp': parameters} parameters['electronic'] = {'pwcutoff': 200} inputs = AttributeDict() inputs.code = Code.get_from_string('mock-vasp@localhost') inputs.structure = structure inputs.parameters = get_data_node('dict', dict=parameters) inputs.potential_family = get_data_node('str', POTCAR_FAMILY_NAME) inputs.potential_mapping = get_data_node('dict', dict=POTCAR_MAP) inputs.options = get_data_node('dict', dict={ 'withmpi': False, 'queue_name': 'None', 'resources': { 'num_machines': 1, 'num_mpiprocs_per_machine': 1 }, 'max_wallclock_seconds': 3600 }) inputs.max_iterations = get_data_node('int', 1) inputs.clean_workdir = get_data_node('bool', False) inputs.verbose = get_data_node('bool', True) # Also set the restart folder as we assume a bands data will start from # a previous calculation that is sitting in the restart folder inputs.restart_folder = RemoteData(computer=inputs.code.computer, remote_path=data_path('test_bands_wc', 'inp')) results, node = run.get_node(workchain, **inputs) assert node.exit_status == 0 assert 'bands' in results kpoints = results['bands'].get_kpoints() test_array = np.array([[0., 0., 0.], [0.02272727, 0., 0.02272727], [0.04545454, 0., 0.04545454], [0.06818182, 0., 0.06818182], [0.09090909, 0., 0.09090909], [0.11363636, 0., 0.11363636], [0.13636364, 0., 0.13636364], [0.15909091, 0., 0.15909091], [0.18181818, 0., 0.18181818], [0.20454545, 0., 0.20454545], [0.22727273, 0., 0.22727273], [0.25, 0., 0.25], [0.27272727, 0., 0.27272727], [0.29545455, 0., 0.29545455], [0.31818182, 0., 0.31818182], [0.34090909, 0., 0.34090909], [0.36363636, 0., 0.36363636], [0.38636364, 0., 0.38636364], [0.40909091, 0., 0.40909091], [0.43181818, 0., 0.43181818], [0.45454545, 0., 0.45454545], [0.47727273, 0., 0.47727273], [0.5, 0., 0.5], [0.51785714, 0.03571429, 0.51785714], [0.53571429, 0.07142857, 0.53571429], [0.55357143, 0.10714286, 0.55357143], [0.57142857, 0.14285714, 0.57142857], [0.58928571, 0.17857143, 0.58928571], [0.60714286, 0.21428571, 0.60714286], [0.625, 0.25, 0.625], [0.375, 0.375, 0.75], [0.35869565, 0.35869565, 0.7173913], [0.3423913, 0.3423913, 0.68478261], [0.32608696, 0.32608696, 0.65217391], [0.30978261, 0.30978261, 0.61956522], [0.29347826, 0.29347826, 0.58695652], [0.27717391, 0.27717391, 0.55434783], [0.26086957, 0.26086957, 0.52173913], [0.24456522, 0.24456522, 0.48913043], [0.22826087, 0.22826087, 0.45652174], [0.21195652, 0.21195652, 0.42391304], [0.19565217, 0.19565217, 0.39130435], [0.17934783, 0.17934783, 0.35869565], [0.16304348, 0.16304348, 0.32608696], [0.14673913, 0.14673913, 0.29347826], [0.13043478, 0.13043478, 0.26086957], [0.11413044, 0.11413044, 0.22826087], [0.09782609, 0.09782609, 0.19565217], [0.08152174, 0.08152174, 0.16304348], [0.06521739, 0.06521739, 0.13043478], [0.04891304, 0.04891304, 0.09782609], [0.0326087, 0.0326087, 0.06521739], [0.01630435, 0.01630435, 0.0326087], [0., 0., 0.], [0.02631579, 0.02631579, 0.02631579], [0.05263158, 0.05263158, 0.05263158], [0.07894737, 0.07894737, 0.07894737], [0.10526316, 0.10526316, 0.10526316], [0.13157895, 0.13157895, 0.13157895], [0.15789474, 0.15789474, 0.15789474], [0.18421053, 0.18421053, 0.18421053], [0.21052632, 0.21052632, 0.21052632], [0.2368421, 0.2368421, 0.2368421], [0.26315789, 0.26315789, 0.26315789], [0.28947368, 0.28947368, 0.28947368], [0.31578947, 0.31578947, 0.31578947], [0.34210526, 0.34210526, 0.34210526], [0.36842105, 0.36842105, 0.36842105], [0.39473684, 0.39473684, 0.39473684], [0.42105263, 0.42105263, 0.42105263], [0.44736842, 0.44736842, 0.44736842], [0.47368421, 0.47368421, 0.47368421], [0.5, 0.5, 0.5], [0.5, 0.48333333, 0.51666667], [0.5, 0.46666667, 0.53333333], [0.5, 0.45, 0.55], [0.5, 0.43333333, 0.56666667], [0.5, 0.41666667, 0.58333333], [0.5, 0.4, 0.6], [0.5, 0.38333333, 0.61666667], [0.5, 0.36666667, 0.63333333], [0.5, 0.35, 0.65], [0.5, 0.33333333, 0.66666667], [0.5, 0.31666667, 0.68333333], [0.5, 0.3, 0.7], [0.5, 0.28333333, 0.71666667], [0.5, 0.26666667, 0.73333333], [0.5, 0.25, 0.75], [0.5, 0.225, 0.725], [0.5, 0.2, 0.7], [0.5, 0.175, 0.675], [0.5, 0.15, 0.65], [0.5, 0.125, 0.625], [0.5, 0.1, 0.6], [0.5, 0.075, 0.575], [0.5, 0.05, 0.55], [0.5, 0.025, 0.525], [0.5, 0., 0.5]]) np.testing.assert_allclose(kpoints, test_array) bands = results['bands'].get_bands() assert bands.shape == (1, 98, 20) np.testing.assert_allclose(bands[0, 0, 0:3], np.array([-6.0753, 6.0254, 6.0254])) np.testing.assert_allclose(bands[0, 2, 0:3], np.array([-6.0386, 5.7955, 5.8737])) np.testing.assert_allclose(bands[0, 97, 0:3], np.array([-1.867, -1.867, 3.1102]))
def test_full_mgo_opt(db_test_app, data_regression): code = db_test_app.get_or_create_code("crystal17.main") with SandboxFolder() as folder: with open_resource_binary("crystal", "mgo_sto3g_opt", "INPUT") as handle: folder.create_file_from_filelike(handle, "INPUT", mode="wb") with open_resource_binary("crystal", "mgo_sto3g_opt", "main.out") as handle: folder.create_file_from_filelike(handle, "main.out", mode="wb") remote = RemoteData( remote_path=folder.abspath, computer=db_test_app.get_or_create_computer() ) builder = populate_builder(remote, code=code, metadata=get_default_metadata()) node = immigrate_existing(builder, remote) attributes = node.attributes attributes["remote_workdir"] = "path/to/remote" attributes["version"] = None data_regression.check(attributes) assert set(node.inputs) == set( ["basissets__Mg", "basissets__O", "parameters", "structure", "symmetry", "code"] ) assert set(node.outputs) == set( ["results", "retrieved", "structure", "remote_folder"] ) expected_instruct_attrs = { "cell": [[0.0, 2.105, 2.105], [2.105, 0.0, 2.105], [2.105, 2.105, 0.0]], "kinds": [ {"mass": 24.305, "name": "Mg", "symbols": ["Mg"], "weights": [1.0]}, {"mass": 15.9994, "name": "O", "symbols": ["O"], "weights": [1.0]}, ], "pbc1": True, "pbc2": True, "pbc3": True, "sites": [ {"kind_name": "Mg", "position": [0.0, 0.0, 0.0]}, {"kind_name": "O", "position": [2.105, 2.105, 2.105]}, ], } assert ( edict.diff( dict(node.inputs.structure.attributes), expected_instruct_attrs, np_allclose=True, atol=1e-3, ) == {} ) expected_outstruct_attrs = { "cell": [ [0.0, 1.94218061274, 1.94218061274], [1.94218061274, 0.0, 1.94218061274], [1.94218061274, 1.94218061274, 0.0], ], "kinds": [ {"mass": 24.305, "name": "Mg", "symbols": ["Mg"], "weights": [1.0]}, {"mass": 15.9994, "name": "O", "symbols": ["O"], "weights": [1.0]}, ], "pbc1": True, "pbc2": True, "pbc3": True, "sites": [ {"kind_name": "Mg", "position": [0.0, 0.0, 0.0]}, { "kind_name": "O", "position": [1.94218061274, 1.94218061274, 1.94218061274], }, ], } assert ( edict.diff( dict(node.outputs.structure.attributes), expected_outstruct_attrs, np_allclose=True, atol=1e-3, ) == {} )
def generate_calcjob_node( self, entry_point_name, retrieved=None, computer_name="localhost", options=None, mark_completed=False, remote_path=None, input_nodes=None, ): """Fixture to generate a mock `CalcJobNode` for testing parsers. Parameters ---------- entry_point_name : str entry point name of the calculation class retrieved : aiida.orm.FolderData containing the file(s) to be parsed computer_name : str used to get or create a ``Computer``, by default 'localhost' options : None or dict any additional metadata options to set on the node remote_path : str path to a folder on the computer mark_completed : bool if True, set the process state to finished, and the exit_status = 0 input_nodes: dict mapping of link label to node Returns ------- aiida.orm.CalcJobNode instance with the `retrieved` node linked as outgoing """ from aiida.common.links import LinkType from aiida.engine import ExitCode, ProcessState from aiida.orm import CalcJobNode, Node, RemoteData from aiida.plugins.entry_point import format_entry_point_string process = self.get_calc_cls(entry_point_name) computer = self.get_or_create_computer(computer_name) entry_point = format_entry_point_string("aiida.calculations", entry_point_name) calc_node = CalcJobNode(computer=computer, process_type=entry_point) calc_node.set_options({ k: v.default() if callable(v.default) else v.default for k, v in process.spec_options.items() if v.has_default() }) calc_node.set_option("resources", { "num_machines": 1, "num_mpiprocs_per_machine": 1 }) calc_node.set_option("max_wallclock_seconds", 1800) if options: calc_node.set_options(options) if mark_completed: calc_node.set_process_state(ProcessState.FINISHED) calc_node.set_exit_status(ExitCode().status) if input_nodes is not None: for label, in_node in input_nodes.items(): in_node_map = in_node if isinstance(in_node, Node): in_node_map = {None: in_node_map} for sublabel, in_node in in_node_map.items(): in_node.store() link_label = (label if sublabel is None else "{}__{}".format(label, sublabel)) calc_node.add_incoming(in_node, link_type=LinkType.INPUT_CALC, link_label=link_label) calc_node.store() if retrieved is not None: retrieved.add_incoming(calc_node, link_type=LinkType.CREATE, link_label="retrieved") retrieved.store() if remote_path is not None: remote = RemoteData(remote_path=remote_path, computer=computer) remote.add_incoming(calc_node, link_type=LinkType.CREATE, link_label="remote_folder") remote.store() return calc_node
def upload_calculation(node, transport, calc_info, folder, inputs=None, dry_run=False): """Upload a `CalcJob` instance :param node: the `CalcJobNode`. :param transport: an already opened transport to use to submit the calculation. :param calc_info: the calculation info datastructure returned by `CalcJob.presubmit` :param folder: temporary local file system folder containing the inputs written by `CalcJob.prepare_for_submission` """ # pylint: disable=too-many-locals,too-many-branches,too-many-statements from logging import LoggerAdapter from tempfile import NamedTemporaryFile from aiida.orm import load_node, Code, RemoteData # If the calculation already has a `remote_folder`, simply return. The upload was apparently already completed # before, which can happen if the daemon is restarted and it shuts down after uploading but before getting the # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the upload. link_label = 'remote_folder' if node.get_outgoing(RemoteData, link_label_filter=link_label).first(): execlogger.warning( f'CalcJobNode<{node.pk}> already has a `{link_label}` output: skipping upload' ) return calc_info computer = node.computer codes_info = calc_info.codes_info input_codes = [ load_node(_.code_uuid, sub_classes=(Code, )) for _ in codes_info ] logger_extra = get_dblogger_extra(node) transport.set_logger_extra(logger_extra) logger = LoggerAdapter(logger=execlogger, extra=logger_extra) if not dry_run and node.has_cached_links(): raise ValueError( 'Cannot submit calculation {} because it has cached input links! If you just want to test the ' 'submission, set `metadata.dry_run` to True in the inputs.'.format( node.pk)) # If we are performing a dry-run, the working directory should actually be a local folder that should already exist if dry_run: workdir = transport.getcwd() else: remote_user = transport.whoami() remote_working_directory = computer.get_workdir().format( username=remote_user) if not remote_working_directory.strip(): raise exceptions.ConfigurationError( "[submission of calculation {}] No remote_working_directory configured for computer '{}'" .format(node.pk, computer.label)) # If it already exists, no exception is raised try: transport.chdir(remote_working_directory) except IOError: logger.debug( '[submission of calculation {}] Unable to chdir in {}, trying to create it' .format(node.pk, remote_working_directory)) try: transport.makedirs(remote_working_directory) transport.chdir(remote_working_directory) except EnvironmentError as exc: raise exceptions.ConfigurationError( '[submission of calculation {}] ' 'Unable to create the remote directory {} on ' "computer '{}': {}".format(node.pk, remote_working_directory, computer.label, exc)) # Store remotely with sharding (here is where we choose # the folder structure of remote jobs; then I store this # in the calculation properties using _set_remote_dir # and I do not have to know the logic, but I just need to # read the absolute path from the calculation properties. transport.mkdir(calc_info.uuid[:2], ignore_existing=True) transport.chdir(calc_info.uuid[:2]) transport.mkdir(calc_info.uuid[2:4], ignore_existing=True) transport.chdir(calc_info.uuid[2:4]) try: # The final directory may already exist, most likely because this function was already executed once, but # failed and as a result was rescheduled by the eninge. In this case it would be fine to delete the folder # and create it from scratch, except that we cannot be sure that this the actual case. Therefore, to err on # the safe side, we move the folder to the lost+found directory before recreating the folder from scratch transport.mkdir(calc_info.uuid[4:]) except OSError: # Move the existing directory to lost+found, log a warning and create a clean directory anyway path_existing = os.path.join(transport.getcwd(), calc_info.uuid[4:]) path_lost_found = os.path.join(remote_working_directory, REMOTE_WORK_DIRECTORY_LOST_FOUND) path_target = os.path.join(path_lost_found, calc_info.uuid) logger.warning( f'tried to create path {path_existing} but it already exists, moving the entire folder to {path_target}' ) # Make sure the lost+found directory exists, then copy the existing folder there and delete the original transport.mkdir(path_lost_found, ignore_existing=True) transport.copytree(path_existing, path_target) transport.rmtree(path_existing) # Now we can create a clean folder for this calculation transport.mkdir(calc_info.uuid[4:]) finally: transport.chdir(calc_info.uuid[4:]) # I store the workdir of the calculation for later file retrieval workdir = transport.getcwd() node.set_remote_workdir(workdir) # I first create the code files, so that the code can put # default files to be overwritten by the plugin itself. # Still, beware! The code file itself could be overwritten... # But I checked for this earlier. for code in input_codes: if code.is_local(): # Note: this will possibly overwrite files for filename in code.list_object_names(): # Note, once #2579 is implemented, use the `node.open` method instead of the named temporary file in # combination with the new `Transport.put_object_from_filelike` # Since the content of the node could potentially be binary, we read the raw bytes and pass them on with NamedTemporaryFile(mode='wb+') as handle: handle.write(code.get_object_content(filename, mode='rb')) handle.flush() transport.put(handle.name, filename) transport.chmod(code.get_local_executable(), 0o755) # rwxr-xr-x # local_copy_list is a list of tuples, each with (uuid, dest_rel_path) # NOTE: validation of these lists are done inside calculation.presubmit() local_copy_list = calc_info.local_copy_list or [] remote_copy_list = calc_info.remote_copy_list or [] remote_symlink_list = calc_info.remote_symlink_list or [] provenance_exclude_list = calc_info.provenance_exclude_list or [] for uuid, filename, target in local_copy_list: logger.debug( f'[submission of calculation {node.uuid}] copying local file/folder to {target}' ) def find_data_node(inputs, uuid): """Find and return the node with the given UUID from a nested mapping of input nodes. :param inputs: (nested) mapping of nodes :param uuid: UUID of the node to find :return: instance of `Node` or `None` if not found """ from collections.abc import Mapping data_node = None for input_node in inputs.values(): if isinstance(input_node, Mapping): data_node = find_data_node(input_node, uuid) elif isinstance(input_node, Node) and input_node.uuid == uuid: data_node = input_node if data_node is not None: break return data_node try: data_node = load_node(uuid=uuid) except exceptions.NotExistent: data_node = find_data_node(inputs, uuid) if data_node is None: logger.warning( f'failed to load Node<{uuid}> specified in the `local_copy_list`' ) else: dirname = os.path.dirname(target) if dirname: os.makedirs(os.path.join(folder.abspath, dirname), exist_ok=True) with folder.open(target, 'wb') as handle: with data_node.open(filename, 'rb') as source: shutil.copyfileobj(source, handle) provenance_exclude_list.append(target) # In a dry_run, the working directory is the raw input folder, which will already contain these resources if not dry_run: for filename in folder.get_content_list(): logger.debug( f'[submission of calculation {node.pk}] copying file/folder {filename}...' ) transport.put(folder.get_abs_path(filename), filename) for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_copy_list: if remote_computer_uuid == computer.uuid: logger.debug( '[submission of calculation {}] copying {} remotely, directly on the machine {}' .format(node.pk, dest_rel_path, computer.label)) try: transport.copy(remote_abs_path, dest_rel_path) except (IOError, OSError): logger.warning( '[submission of calculation {}] Unable to copy remote resource from {} to {}! ' 'Stopping.'.format(node.pk, remote_abs_path, dest_rel_path)) raise else: raise NotImplementedError( '[submission of calculation {}] Remote copy between two different machines is ' 'not implemented yet'.format(node.pk)) for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_symlink_list: if remote_computer_uuid == computer.uuid: logger.debug( '[submission of calculation {}] copying {} remotely, directly on the machine {}' .format(node.pk, dest_rel_path, computer.label)) try: transport.symlink(remote_abs_path, dest_rel_path) except (IOError, OSError): logger.warning( '[submission of calculation {}] Unable to create remote symlink from {} to {}! ' 'Stopping.'.format(node.pk, remote_abs_path, dest_rel_path)) raise else: raise IOError( f'It is not possible to create a symlink between two different machines for calculation {node.pk}' ) else: if remote_copy_list: with open(os.path.join(workdir, '_aiida_remote_copy_list.txt'), 'w') as handle: for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_copy_list: handle.write( 'would have copied {} to {} in working directory on remote {}' .format(remote_abs_path, dest_rel_path, computer.label)) if remote_symlink_list: with open(os.path.join(workdir, '_aiida_remote_symlink_list.txt'), 'w') as handle: for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_symlink_list: handle.write( 'would have created symlinks from {} to {} in working directory on remote {}' .format(remote_abs_path, dest_rel_path, computer.label)) # Loop recursively over content of the sandbox folder copying all that are not in `provenance_exclude_list`. Note # that directories are not created explicitly. The `node.put_object_from_filelike` call will create intermediate # directories for nested files automatically when needed. This means though that empty folders in the sandbox or # folders that would be empty when considering the `provenance_exclude_list` will *not* be copied to the repo. The # advantage of this explicit copying instead of deleting the files from `provenance_exclude_list` from the sandbox # first before moving the entire remaining content to the node's repository, is that in this way we are guaranteed # not to accidentally move files to the repository that should not go there at all cost. Note that all entries in # the provenance exclude list are normalized first, just as the paths that are in the sandbox folder, otherwise the # direct equality test may fail, e.g.: './path/file.txt' != 'path/file.txt' even though they reference the same file provenance_exclude_list = [ os.path.normpath(entry) for entry in provenance_exclude_list ] for root, _, filenames in os.walk(folder.abspath): for filename in filenames: filepath = os.path.join(root, filename) relpath = os.path.normpath( os.path.relpath(filepath, folder.abspath)) if relpath not in provenance_exclude_list: with open(filepath, 'rb') as handle: node._repository.put_object_from_filelike(handle, relpath, 'wb', force=True) # pylint: disable=protected-access if not dry_run: # Make sure that attaching the `remote_folder` with a link is the last thing we do. This gives the biggest # chance of making this method idempotent. That is to say, if a runner gets interrupted during this action, it # will simply retry the upload, unless we got here and managed to link it up, in which case we move to the next # task. Because in that case, the check for the existence of this link at the top of this function will exit # early from this command. remotedata = RemoteData(computer=computer, remote_path=workdir) remotedata.add_incoming(node, link_type=LinkType.CREATE, link_label='remote_folder') remotedata.store()