def test_cif_structure_roundtrip(self): from aiida.tools.dbexporters.tcod import export_cif, export_values from aiida.orm import Code from aiida.orm import JobCalculation from aiida.orm.data.cif import CifData from aiida.orm.data.parameter import ParameterData from aiida.orm.data.upf import UpfData from aiida.orm.data.folder import FolderData from aiida.common.folders import SandboxFolder from aiida.common.datastructures import calc_states import tempfile with tempfile.NamedTemporaryFile() as f: f.write(''' data_test _cell_length_a 10 _cell_length_b 10 _cell_length_c 10 _cell_angle_alpha 90 _cell_angle_beta 90 _cell_angle_gamma 90 loop_ _atom_site_label _atom_site_fract_x _atom_site_fract_y _atom_site_fract_z C 0 0 0 O 0.5 0.5 0.5 ''') f.flush() a = CifData(file=f.name) c = a._get_aiida_structure() c.store() pd = ParameterData() code = Code(local_executable='test.sh') with tempfile.NamedTemporaryFile() as f: f.write("#/bin/bash\n\necho test run\n") f.flush() code.add_path(f.name, 'test.sh') code.store() calc = JobCalculation(computer=self.computer) calc.set_resources({'num_machines': 1, 'num_mpiprocs_per_machine': 1}) calc.add_link_from(code, "code") calc.set_environment_variables({ 'PATH': '/dev/null', 'USER': '******' }) with tempfile.NamedTemporaryFile(prefix="Fe") as f: f.write("<UPF version=\"2.0.1\">\nelement=\"Fe\"\n") f.flush() upf = UpfData(file=f.name) upf.store() calc.add_link_from(upf, "upf") with tempfile.NamedTemporaryFile() as f: f.write("data_test") f.flush() cif = CifData(file=f.name) cif.store() calc.add_link_from(cif, "cif") calc.store() calc._set_state(calc_states.SUBMITTING) with SandboxFolder() as f: calc._store_raw_input_folder(f.abspath) fd = FolderData() with open( fd._get_folder_pathsubfolder.get_abs_path( calc._SCHED_OUTPUT_FILE), 'w') as f: f.write("standard output") f.flush() with open( fd._get_folder_pathsubfolder.get_abs_path( calc._SCHED_ERROR_FILE), 'w') as f: f.write("standard error") f.flush() fd.store() fd.add_link_from(calc, calc._get_linkname_retrieved(), LinkType.CREATE) pd.add_link_from(calc, "calc", LinkType.CREATE) pd.store() with self.assertRaises(ValueError): export_cif(c, parameters=pd) c.add_link_from(calc, "calc", LinkType.CREATE) export_cif(c, parameters=pd) values = export_values(c, parameters=pd) values = values['0'] self.assertEquals(values['_tcod_computation_environment'], ['PATH=/dev/null\nUSER=unknown']) self.assertEquals(values['_tcod_computation_command'], ['cd 1; ./_aiidasubmit.sh'])
def retrieve_calculation(calculation, transport, retrieved_temporary_folder): """ Retrieve all the files of a completed job calculation using the given transport. If the job defined anything in the `retrieve_temporary_list`, those entries will be stored in the `retrieved_temporary_folder`. The caller is responsible for creating and destroying this folder. :param calculation: the instance of JobCalculation to update. :param transport: an already opened transport to use for the retrieval. :param retrieved_temporary_folder: the absolute path to a directory in which to store the files listed, if any, in the `retrieved_temporary_folder` of the jobs CalcInfo """ logger_extra = get_dblogger_extra(calculation) execlogger.debug("Retrieving calc {}".format(calculation.pk), extra=logger_extra) workdir = calculation._get_remote_workdir() execlogger.debug("[retrieval of calc {}] chdir {}".format( calculation.pk, workdir), extra=logger_extra) # Create the FolderData node to attach everything to retrieved_files = FolderData() retrieved_files.add_link_from(calculation, label=calculation._get_linkname_retrieved(), link_type=LinkType.CREATE) with transport: transport.chdir(workdir) # First, retrieve the files of folderdata retrieve_list = calculation._get_retrieve_list() retrieve_temporary_list = calculation._get_retrieve_temporary_list() retrieve_singlefile_list = calculation._get_retrieve_singlefile_list() with SandboxFolder() as folder: retrieve_files_from_list(calculation, transport, folder.abspath, retrieve_list) # Here I retrieved everything; now I store them inside the calculation retrieved_files.replace_with_folder(folder.abspath, overwrite=True) # Second, retrieve the singlefiles with SandboxFolder() as folder: _retrieve_singlefiles(calculation, transport, folder, retrieve_singlefile_list, logger_extra) # Retrieve the temporary files in the retrieved_temporary_folder if any files were # specified in the 'retrieve_temporary_list' key if retrieve_temporary_list: retrieve_files_from_list(calculation, transport, retrieved_temporary_folder, retrieve_temporary_list) # Log the files that were retrieved in the temporary folder for filename in os.listdir(retrieved_temporary_folder): execlogger.debug( "[retrieval of calc {}] Retrieved temporary file or folder '{}'" .format(calculation.pk, filename), extra=logger_extra) # Store everything execlogger.debug("[retrieval of calc {}] " "Storing retrieved_files={}".format( calculation.pk, retrieved_files.dbnode.pk), extra=logger_extra) retrieved_files.store()
def retrieve_computed_for_authinfo(authinfo): from aiida.orm import JobCalculation from aiida.common.folders import SandboxFolder from aiida.orm.data.folder import FolderData from aiida.utils.logger import get_dblogger_extra from aiida.orm import DataFactory import os if not authinfo.enabled: return calcs_to_retrieve = list( JobCalculation._get_all_with_state(state=calc_states.COMPUTED, computer=authinfo.dbcomputer, user=authinfo.aiidauser)) retrieved = [] # I avoid to open an ssh connection if there are no # calcs with state not COMPUTED if len(calcs_to_retrieve): # Open connection with authinfo.get_transport() as t: for calc in calcs_to_retrieve: logger_extra = get_dblogger_extra(calc) t._set_logger_extra(logger_extra) try: calc._set_state(calc_states.RETRIEVING) except ModificationNotAllowed: # Someone else has already started to retrieve it, # just log and continue execlogger.debug("Attempting to retrieve more than once " "calculation {}: skipping!".format( calc.pk), extra=logger_extra) continue # with the next calculation to retrieve try: execlogger.debug("Retrieving calc {}".format(calc.pk), extra=logger_extra) workdir = calc._get_remote_workdir() retrieve_list = calc._get_retrieve_list() retrieve_singlefile_list = calc._get_retrieve_singlefile_list( ) execlogger.debug("[retrieval of calc {}] " "chdir {}".format(calc.pk, workdir), extra=logger_extra) t.chdir(workdir) retrieved_files = FolderData() retrieved_files.add_link_from( calc, label=calc._get_linkname_retrieved(), link_type=LinkType.CREATE) # First, retrieve the files of folderdata with SandboxFolder() as folder: for item in retrieve_list: # I have two possibilities: # * item is a string # * or is a list # then I have other two possibilities: # * there are file patterns # * or not # First decide the name of the files if isinstance(item, list): tmp_rname, tmp_lname, depth = item # if there are more than one file I do something differently if t.has_magic(tmp_rname): remote_names = t.glob(tmp_rname) local_names = [] for rem in remote_names: to_append = rem.split( os.path.sep )[-depth:] if depth > 0 else [] local_names.append( os.path.sep.join([tmp_lname] + to_append)) else: remote_names = [tmp_rname] to_append = remote_names.split( os.path.sep )[-depth:] if depth > 0 else [] local_names = [ os.path.sep.join([tmp_lname] + to_append) ] if depth > 1: # create directories in the folder, if needed for this_local_file in local_names: new_folder = os.path.join( folder.abspath, os.path.split(this_local_file)[0]) if not os.path.exists(new_folder): os.makedirs(new_folder) else: # it is a string if t.has_magic(item): remote_names = t.glob(item) local_names = [ os.path.split(rem)[1] for rem in remote_names ] else: remote_names = [item] local_names = [os.path.split(item)[1]] for rem, loc in zip(remote_names, local_names): execlogger.debug( "[retrieval of calc {}] " "Trying to retrieve remote item '{}'". format(calc.pk, rem), extra=logger_extra) t.get(rem, os.path.join(folder.abspath, loc), ignore_nonexisting=True) # Here I retrieved everything; # now I store them inside the calculation retrieved_files.replace_with_folder(folder.abspath, overwrite=True) # Second, retrieve the singlefiles with SandboxFolder() as folder: singlefile_list = [] for (linkname, subclassname, filename) in retrieve_singlefile_list: execlogger.debug( "[retrieval of calc {}] Trying " "to retrieve remote singlefile '{}'".format( calc.pk, filename), extra=logger_extra) localfilename = os.path.join( folder.abspath, os.path.split(filename)[1]) t.get(filename, localfilename, ignore_nonexisting=True) singlefile_list.append( (linkname, subclassname, localfilename)) # ignore files that have not been retrieved singlefile_list = [ i for i in singlefile_list if os.path.exists(i[2]) ] # after retrieving from the cluster, I create the objects singlefiles = [] for (linkname, subclassname, filename) in singlefile_list: SinglefileSubclass = DataFactory(subclassname) singlefile = SinglefileSubclass() singlefile.set_file(filename) singlefile.add_link_from(calc, label=linkname, link_type=LinkType.CREATE) singlefiles.append(singlefile) # Finally, store execlogger.debug("[retrieval of calc {}] " "Storing retrieved_files={}".format( calc.pk, retrieved_files.dbnode.pk), extra=logger_extra) retrieved_files.store() for fil in singlefiles: execlogger.debug( "[retrieval of calc {}] " "Storing retrieved_singlefile={}".format( calc.pk, fil.dbnode.pk), extra=logger_extra) fil.store() # If I was the one retrieving, I should also be the only # one parsing! I do not check calc._set_state(calc_states.PARSING) Parser = calc.get_parserclass() # If no parser is set, the calculation is successful successful = True if Parser is not None: # TODO: parse here parser = Parser(calc) successful, new_nodes_tuple = parser.parse_from_calc() for label, n in new_nodes_tuple: n.add_link_from(calc, label=label, link_type=LinkType.CREATE) n.store() if successful: try: calc._set_state(calc_states.FINISHED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass else: try: calc._set_state(calc_states.FAILED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass execlogger.error( "[parsing of calc {}] " "The parser returned an error, but it should have " "created an output node with some partial results " "and warnings. Check there for more information on " "the problem".format(calc.pk), extra=logger_extra) retrieved.append(calc) except Exception: import traceback tb = traceback.format_exc() newextradict = logger_extra.copy() newextradict['full_traceback'] = tb if calc.get_state() == calc_states.PARSING: execlogger.error("Error parsing calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) # TODO: add a 'comment' to the calculation try: calc._set_state(calc_states.PARSINGFAILED) except ModificationNotAllowed: pass else: execlogger.error("Error retrieving calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) try: calc._set_state(calc_states.RETRIEVALFAILED) except ModificationNotAllowed: pass raise return retrieved
def test_complex_graph_import_export(self): """ This test checks that a small and bit complex graph can be correctly exported and imported. It will create the graph, store it to the database, export it to a file and import it. In the end it will check if the initial nodes are present at the imported graph. """ import tempfile import shutil import os from aiida.orm.calculation.job import JobCalculation from aiida.orm.data.folder import FolderData from aiida.orm.data.parameter import ParameterData from aiida.orm.data.remote import RemoteData from aiida.common.links import LinkType from aiida.orm.importexport import export, import_data from aiida.orm.utils import load_node from aiida.common.exceptions import NotExistent temp_folder = tempfile.mkdtemp() try: calc1 = JobCalculation() calc1.set_computer(self.computer) calc1.set_resources({ "num_machines": 1, "num_mpiprocs_per_machine": 1 }) calc1.label = "calc1" calc1.store() calc1._set_state(u'RETRIEVING') pd1 = ParameterData() pd1.label = "pd1" pd1.store() pd2 = ParameterData() pd2.label = "pd2" pd2.store() rd1 = RemoteData() rd1.label = "rd1" rd1.set_remote_path("/x/y.py") rd1.set_computer(self.computer) rd1.store() rd1.add_link_from(calc1, link_type=LinkType.CREATE) calc2 = JobCalculation() calc2.set_computer(self.computer) calc2.set_resources({ "num_machines": 1, "num_mpiprocs_per_machine": 1 }) calc2.label = "calc2" calc2.store() calc2.add_link_from(pd1, link_type=LinkType.INPUT) calc2.add_link_from(pd2, link_type=LinkType.INPUT) calc2.add_link_from(rd1, link_type=LinkType.INPUT) calc2._set_state(u'SUBMITTING') fd1 = FolderData() fd1.label = "fd1" fd1.store() fd1.add_link_from(calc2, link_type=LinkType.CREATE) node_uuids_labels = { calc1.uuid: calc1.label, pd1.uuid: pd1.label, pd2.uuid: pd2.label, rd1.uuid: rd1.label, calc2.uuid: calc2.label, fd1.uuid: fd1.label } filename = os.path.join(temp_folder, "export.tar.gz") export([fd1.dbnode], outfile=filename, silent=True) self.clean_db() import_data(filename, silent=True, ignore_unknown_nodes=True) for uuid, label in node_uuids_labels.iteritems(): try: load_node(uuid) except NotExistent: self.fail("Node with UUID {} and label {} was not " "found.".format(uuid, label)) finally: # Deleting the created temporary folder shutil.rmtree(temp_folder, ignore_errors=True)
def retrieve_computed_for_authinfo(authinfo): from aiida.orm import JobCalculation from aiida.common.folders import SandboxFolder from aiida.orm.data.folder import FolderData from aiida.common.log import get_dblogger_extra from aiida.orm import DataFactory from aiida.backends.utils import QueryFactory import os if not authinfo.enabled: return qmanager = QueryFactory()() # I create a unique set of pairs (computer, aiidauser) calcs_to_retrieve = qmanager.query_jobcalculations_by_computer_user_state( state=calc_states.COMPUTED, computer=authinfo.dbcomputer, user=authinfo.aiidauser) retrieved = [] # I avoid to open an ssh connection if there are no # calcs with state not COMPUTED if len(calcs_to_retrieve): # Open connection with authinfo.get_transport() as t: for calc in calcs_to_retrieve: logger_extra = get_dblogger_extra(calc) t._set_logger_extra(logger_extra) try: calc._set_state(calc_states.RETRIEVING) except ModificationNotAllowed: # Someone else has already started to retrieve it, # just log and continue execlogger.debug("Attempting to retrieve more than once " "calculation {}: skipping!".format( calc.pk), extra=logger_extra) continue # with the next calculation to retrieve try: execlogger.debug("Retrieving calc {}".format(calc.pk), extra=logger_extra) workdir = calc._get_remote_workdir() retrieve_list = calc._get_retrieve_list() retrieve_temporary_list = calc._get_retrieve_temporary_list( ) retrieve_singlefile_list = calc._get_retrieve_singlefile_list( ) execlogger.debug("[retrieval of calc {}] " "chdir {}".format(calc.pk, workdir), extra=logger_extra) t.chdir(workdir) retrieved_files = FolderData() retrieved_files.add_link_from( calc, label=calc._get_linkname_retrieved(), link_type=LinkType.CREATE) # First, retrieve the files of folderdata with SandboxFolder() as folder: retrieve_files_from_list(calc, t, folder, retrieve_list) # Here I retrieved everything; now I store them inside the calculation retrieved_files.replace_with_folder(folder.abspath, overwrite=True) # Second, retrieve the singlefiles with SandboxFolder() as folder: singlefile_list = [] for (linkname, subclassname, filename) in retrieve_singlefile_list: execlogger.debug( "[retrieval of calc {}] Trying " "to retrieve remote singlefile '{}'".format( calc.pk, filename), extra=logger_extra) localfilename = os.path.join( folder.abspath, os.path.split(filename)[1]) t.get(filename, localfilename, ignore_nonexisting=True) singlefile_list.append( (linkname, subclassname, localfilename)) # ignore files that have not been retrieved singlefile_list = [ i for i in singlefile_list if os.path.exists(i[2]) ] # after retrieving from the cluster, I create the objects singlefiles = [] for (linkname, subclassname, filename) in singlefile_list: SinglefileSubclass = DataFactory(subclassname) singlefile = SinglefileSubclass() singlefile.set_file(filename) singlefile.add_link_from(calc, label=linkname, link_type=LinkType.CREATE) singlefiles.append(singlefile) # Retrieve the temporary files in a separate temporary folder if any files were # specified in the 'retrieve_temporary_list' key if retrieve_temporary_list: retrieved_temporary_folder = FolderData() with SandboxFolder() as folder: retrieve_files_from_list(calc, t, folder, retrieve_temporary_list) retrieved_temporary_folder.replace_with_folder( folder.abspath, overwrite=True) # Log the files that were retrieved in the temporary folder for entry in retrieved_temporary_folder.get_folder_list( ): execlogger.debug( "[retrieval of calc {}] Retrieved temporary file or folder '{}'" .format(calc.pk, entry), extra=logger_extra) else: retrieved_temporary_folder = None # Finally, store the retrieved_files node. The retrieved_temporary_folder node # is explicitly not stored, but will just be passed to the parser.parse_from calc call execlogger.debug( "[retrieval of calc {}] Storing retrieved_files={}". format(calc.pk, retrieved_files.dbnode.pk), extra=logger_extra) retrieved_files.store() for fil in singlefiles: execlogger.debug( "[retrieval of calc {}] Storing retrieved_singlefile={}" .format(calc.pk, fil.dbnode.pk), extra=logger_extra) fil.store() # If I was the one retrieving, I should also be the only one parsing! I do not check calc._set_state(calc_states.PARSING) Parser = calc.get_parserclass() # If no parser is set, the calculation is successful successful = True if Parser is not None: parser = Parser(calc) successful, new_nodes_tuple = parser.parse_from_calc( retrieved_temporary_folder) for label, n in new_nodes_tuple: n.add_link_from(calc, label=label, link_type=LinkType.CREATE) n.store() if successful: try: calc._set_state(calc_states.FINISHED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass else: try: calc._set_state(calc_states.FAILED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass execlogger.error( "[parsing of calc {}] " "The parser returned an error, but it should have " "created an output node with some partial results " "and warnings. Check there for more information on " "the problem".format(calc.pk), extra=logger_extra) retrieved.append(calc) except Exception: import traceback tb = traceback.format_exc() newextradict = logger_extra.copy() newextradict['full_traceback'] = tb if calc.get_state() == calc_states.PARSING: execlogger.error("Error parsing calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) # TODO: add a 'comment' to the calculation try: calc._set_state(calc_states.PARSINGFAILED) except ModificationNotAllowed: pass else: execlogger.error("Error retrieving calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) try: calc._set_state(calc_states.RETRIEVALFAILED) except ModificationNotAllowed: pass raise return retrieved