def read_test(self, outfolder): import os import importlib import json from aiida.orm import JobCalculation from aiida.orm.utils import load_node from aiida.orm.importexport import import_data imported = import_data(outfolder, ignore_unknown_nodes=True, silent=True) calc = None for _, pk in imported['Node']['new']: c = load_node(pk) if issubclass(c.__class__, JobCalculation): calc = c break retrieved = calc.out.retrieved retrieve_temporary_list = calc.get_attr('retrieve_temporary_list', None) if retrieve_temporary_list: from aiida.orm.data.folder import FolderData retrieved_temporary_folder = FolderData() retrieved_temporary_folder.replace_with_folder( retrieved.get_abs_path('.'), overwrite=True) else: retrieved_temporary_folder = None try: with open(os.path.join(outfolder, '_aiida_checks.json')) as f: tests = json.load(f) except IOError: raise ValueError("This test does not provide a check file!") except ValueError: raise ValueError( "This test does provide a check file, but it cannot " "be JSON-decoded!") mod_path = 'aiida.backends.tests.parser_tests.{}'.format( os.path.split(outfolder)[1]) skip_test = False try: m = importlib.import_module(mod_path) skip_test = m.skip_condition() except Exception: pass if skip_test: raise SkipTestException return calc, tests, retrieved_temporary_folder
def get_retrieved(self, parameters): """Set up a fake 'retrieved' dict. As well as an output folder. """ import tempfile import shutil from aiida.orm.data.folder import FolderData tmp_dir = tempfile.mkdtemp() test_files = ['HKUST-1.cssr', 'HKUST-1.sa', 'HKUST-1.volpo'] output_files = parameters.output_files for ftest, fout in list(zip(test_files, output_files)): shutil.copyfile( os.path.join(zt.TEST_DIR, ftest), os.path.join(tmp_dir, fout)) res = FolderData() res.replace_with_folder(tmp_dir) shutil.rmtree(tmp_dir) retrieved = {'retrieved': res} return retrieved
def retrieve_calculation(calculation, transport, retrieved_temporary_folder): """ Retrieve all the files of a completed job calculation using the given transport. If the job defined anything in the `retrieve_temporary_list`, those entries will be stored in the `retrieved_temporary_folder`. The caller is responsible for creating and destroying this folder. :param calculation: the instance of JobCalculation to update. :param transport: an already opened transport to use for the retrieval. :param retrieved_temporary_folder: the absolute path to a directory in which to store the files listed, if any, in the `retrieved_temporary_folder` of the jobs CalcInfo """ logger_extra = get_dblogger_extra(calculation) execlogger.debug("Retrieving calc {}".format(calculation.pk), extra=logger_extra) workdir = calculation._get_remote_workdir() execlogger.debug("[retrieval of calc {}] chdir {}".format( calculation.pk, workdir), extra=logger_extra) # Create the FolderData node to attach everything to retrieved_files = FolderData() retrieved_files.add_link_from(calculation, label=calculation._get_linkname_retrieved(), link_type=LinkType.CREATE) with transport: transport.chdir(workdir) # First, retrieve the files of folderdata retrieve_list = calculation._get_retrieve_list() retrieve_temporary_list = calculation._get_retrieve_temporary_list() retrieve_singlefile_list = calculation._get_retrieve_singlefile_list() with SandboxFolder() as folder: retrieve_files_from_list(calculation, transport, folder.abspath, retrieve_list) # Here I retrieved everything; now I store them inside the calculation retrieved_files.replace_with_folder(folder.abspath, overwrite=True) # Second, retrieve the singlefiles with SandboxFolder() as folder: _retrieve_singlefiles(calculation, transport, folder, retrieve_singlefile_list, logger_extra) # Retrieve the temporary files in the retrieved_temporary_folder if any files were # specified in the 'retrieve_temporary_list' key if retrieve_temporary_list: retrieve_files_from_list(calculation, transport, retrieved_temporary_folder, retrieve_temporary_list) # Log the files that were retrieved in the temporary folder for filename in os.listdir(retrieved_temporary_folder): execlogger.debug( "[retrieval of calc {}] Retrieved temporary file or folder '{}'" .format(calculation.pk, filename), extra=logger_extra) # Store everything execlogger.debug("[retrieval of calc {}] " "Storing retrieved_files={}".format( calculation.pk, retrieved_files.dbnode.pk), extra=logger_extra) retrieved_files.store()
def retrieve_computed_for_authinfo(authinfo): from aiida.orm import JobCalculation from aiida.common.folders import SandboxFolder from aiida.orm.data.folder import FolderData from aiida.utils.logger import get_dblogger_extra from aiida.orm import DataFactory import os if not authinfo.enabled: return calcs_to_retrieve = list( JobCalculation._get_all_with_state(state=calc_states.COMPUTED, computer=authinfo.dbcomputer, user=authinfo.aiidauser)) retrieved = [] # I avoid to open an ssh connection if there are no # calcs with state not COMPUTED if len(calcs_to_retrieve): # Open connection with authinfo.get_transport() as t: for calc in calcs_to_retrieve: logger_extra = get_dblogger_extra(calc) t._set_logger_extra(logger_extra) try: calc._set_state(calc_states.RETRIEVING) except ModificationNotAllowed: # Someone else has already started to retrieve it, # just log and continue execlogger.debug("Attempting to retrieve more than once " "calculation {}: skipping!".format( calc.pk), extra=logger_extra) continue # with the next calculation to retrieve try: execlogger.debug("Retrieving calc {}".format(calc.pk), extra=logger_extra) workdir = calc._get_remote_workdir() retrieve_list = calc._get_retrieve_list() retrieve_singlefile_list = calc._get_retrieve_singlefile_list( ) execlogger.debug("[retrieval of calc {}] " "chdir {}".format(calc.pk, workdir), extra=logger_extra) t.chdir(workdir) retrieved_files = FolderData() retrieved_files.add_link_from( calc, label=calc._get_linkname_retrieved(), link_type=LinkType.CREATE) # First, retrieve the files of folderdata with SandboxFolder() as folder: for item in retrieve_list: # I have two possibilities: # * item is a string # * or is a list # then I have other two possibilities: # * there are file patterns # * or not # First decide the name of the files if isinstance(item, list): tmp_rname, tmp_lname, depth = item # if there are more than one file I do something differently if t.has_magic(tmp_rname): remote_names = t.glob(tmp_rname) local_names = [] for rem in remote_names: to_append = rem.split( os.path.sep )[-depth:] if depth > 0 else [] local_names.append( os.path.sep.join([tmp_lname] + to_append)) else: remote_names = [tmp_rname] to_append = remote_names.split( os.path.sep )[-depth:] if depth > 0 else [] local_names = [ os.path.sep.join([tmp_lname] + to_append) ] if depth > 1: # create directories in the folder, if needed for this_local_file in local_names: new_folder = os.path.join( folder.abspath, os.path.split(this_local_file)[0]) if not os.path.exists(new_folder): os.makedirs(new_folder) else: # it is a string if t.has_magic(item): remote_names = t.glob(item) local_names = [ os.path.split(rem)[1] for rem in remote_names ] else: remote_names = [item] local_names = [os.path.split(item)[1]] for rem, loc in zip(remote_names, local_names): execlogger.debug( "[retrieval of calc {}] " "Trying to retrieve remote item '{}'". format(calc.pk, rem), extra=logger_extra) t.get(rem, os.path.join(folder.abspath, loc), ignore_nonexisting=True) # Here I retrieved everything; # now I store them inside the calculation retrieved_files.replace_with_folder(folder.abspath, overwrite=True) # Second, retrieve the singlefiles with SandboxFolder() as folder: singlefile_list = [] for (linkname, subclassname, filename) in retrieve_singlefile_list: execlogger.debug( "[retrieval of calc {}] Trying " "to retrieve remote singlefile '{}'".format( calc.pk, filename), extra=logger_extra) localfilename = os.path.join( folder.abspath, os.path.split(filename)[1]) t.get(filename, localfilename, ignore_nonexisting=True) singlefile_list.append( (linkname, subclassname, localfilename)) # ignore files that have not been retrieved singlefile_list = [ i for i in singlefile_list if os.path.exists(i[2]) ] # after retrieving from the cluster, I create the objects singlefiles = [] for (linkname, subclassname, filename) in singlefile_list: SinglefileSubclass = DataFactory(subclassname) singlefile = SinglefileSubclass() singlefile.set_file(filename) singlefile.add_link_from(calc, label=linkname, link_type=LinkType.CREATE) singlefiles.append(singlefile) # Finally, store execlogger.debug("[retrieval of calc {}] " "Storing retrieved_files={}".format( calc.pk, retrieved_files.dbnode.pk), extra=logger_extra) retrieved_files.store() for fil in singlefiles: execlogger.debug( "[retrieval of calc {}] " "Storing retrieved_singlefile={}".format( calc.pk, fil.dbnode.pk), extra=logger_extra) fil.store() # If I was the one retrieving, I should also be the only # one parsing! I do not check calc._set_state(calc_states.PARSING) Parser = calc.get_parserclass() # If no parser is set, the calculation is successful successful = True if Parser is not None: # TODO: parse here parser = Parser(calc) successful, new_nodes_tuple = parser.parse_from_calc() for label, n in new_nodes_tuple: n.add_link_from(calc, label=label, link_type=LinkType.CREATE) n.store() if successful: try: calc._set_state(calc_states.FINISHED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass else: try: calc._set_state(calc_states.FAILED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass execlogger.error( "[parsing of calc {}] " "The parser returned an error, but it should have " "created an output node with some partial results " "and warnings. Check there for more information on " "the problem".format(calc.pk), extra=logger_extra) retrieved.append(calc) except Exception: import traceback tb = traceback.format_exc() newextradict = logger_extra.copy() newextradict['full_traceback'] = tb if calc.get_state() == calc_states.PARSING: execlogger.error("Error parsing calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) # TODO: add a 'comment' to the calculation try: calc._set_state(calc_states.PARSINGFAILED) except ModificationNotAllowed: pass else: execlogger.error("Error retrieving calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) try: calc._set_state(calc_states.RETRIEVALFAILED) except ModificationNotAllowed: pass raise return retrieved
def retrieve_computed_for_authinfo(authinfo): from aiida.orm import JobCalculation from aiida.common.folders import SandboxFolder from aiida.orm.data.folder import FolderData from aiida.common.log import get_dblogger_extra from aiida.orm import DataFactory from aiida.backends.utils import QueryFactory import os if not authinfo.enabled: return qmanager = QueryFactory()() # I create a unique set of pairs (computer, aiidauser) calcs_to_retrieve = qmanager.query_jobcalculations_by_computer_user_state( state=calc_states.COMPUTED, computer=authinfo.dbcomputer, user=authinfo.aiidauser) retrieved = [] # I avoid to open an ssh connection if there are no # calcs with state not COMPUTED if len(calcs_to_retrieve): # Open connection with authinfo.get_transport() as t: for calc in calcs_to_retrieve: logger_extra = get_dblogger_extra(calc) t._set_logger_extra(logger_extra) try: calc._set_state(calc_states.RETRIEVING) except ModificationNotAllowed: # Someone else has already started to retrieve it, # just log and continue execlogger.debug("Attempting to retrieve more than once " "calculation {}: skipping!".format( calc.pk), extra=logger_extra) continue # with the next calculation to retrieve try: execlogger.debug("Retrieving calc {}".format(calc.pk), extra=logger_extra) workdir = calc._get_remote_workdir() retrieve_list = calc._get_retrieve_list() retrieve_temporary_list = calc._get_retrieve_temporary_list( ) retrieve_singlefile_list = calc._get_retrieve_singlefile_list( ) execlogger.debug("[retrieval of calc {}] " "chdir {}".format(calc.pk, workdir), extra=logger_extra) t.chdir(workdir) retrieved_files = FolderData() retrieved_files.add_link_from( calc, label=calc._get_linkname_retrieved(), link_type=LinkType.CREATE) # First, retrieve the files of folderdata with SandboxFolder() as folder: retrieve_files_from_list(calc, t, folder, retrieve_list) # Here I retrieved everything; now I store them inside the calculation retrieved_files.replace_with_folder(folder.abspath, overwrite=True) # Second, retrieve the singlefiles with SandboxFolder() as folder: singlefile_list = [] for (linkname, subclassname, filename) in retrieve_singlefile_list: execlogger.debug( "[retrieval of calc {}] Trying " "to retrieve remote singlefile '{}'".format( calc.pk, filename), extra=logger_extra) localfilename = os.path.join( folder.abspath, os.path.split(filename)[1]) t.get(filename, localfilename, ignore_nonexisting=True) singlefile_list.append( (linkname, subclassname, localfilename)) # ignore files that have not been retrieved singlefile_list = [ i for i in singlefile_list if os.path.exists(i[2]) ] # after retrieving from the cluster, I create the objects singlefiles = [] for (linkname, subclassname, filename) in singlefile_list: SinglefileSubclass = DataFactory(subclassname) singlefile = SinglefileSubclass() singlefile.set_file(filename) singlefile.add_link_from(calc, label=linkname, link_type=LinkType.CREATE) singlefiles.append(singlefile) # Retrieve the temporary files in a separate temporary folder if any files were # specified in the 'retrieve_temporary_list' key if retrieve_temporary_list: retrieved_temporary_folder = FolderData() with SandboxFolder() as folder: retrieve_files_from_list(calc, t, folder, retrieve_temporary_list) retrieved_temporary_folder.replace_with_folder( folder.abspath, overwrite=True) # Log the files that were retrieved in the temporary folder for entry in retrieved_temporary_folder.get_folder_list( ): execlogger.debug( "[retrieval of calc {}] Retrieved temporary file or folder '{}'" .format(calc.pk, entry), extra=logger_extra) else: retrieved_temporary_folder = None # Finally, store the retrieved_files node. The retrieved_temporary_folder node # is explicitly not stored, but will just be passed to the parser.parse_from calc call execlogger.debug( "[retrieval of calc {}] Storing retrieved_files={}". format(calc.pk, retrieved_files.dbnode.pk), extra=logger_extra) retrieved_files.store() for fil in singlefiles: execlogger.debug( "[retrieval of calc {}] Storing retrieved_singlefile={}" .format(calc.pk, fil.dbnode.pk), extra=logger_extra) fil.store() # If I was the one retrieving, I should also be the only one parsing! I do not check calc._set_state(calc_states.PARSING) Parser = calc.get_parserclass() # If no parser is set, the calculation is successful successful = True if Parser is not None: parser = Parser(calc) successful, new_nodes_tuple = parser.parse_from_calc( retrieved_temporary_folder) for label, n in new_nodes_tuple: n.add_link_from(calc, label=label, link_type=LinkType.CREATE) n.store() if successful: try: calc._set_state(calc_states.FINISHED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass else: try: calc._set_state(calc_states.FAILED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass execlogger.error( "[parsing of calc {}] " "The parser returned an error, but it should have " "created an output node with some partial results " "and warnings. Check there for more information on " "the problem".format(calc.pk), extra=logger_extra) retrieved.append(calc) except Exception: import traceback tb = traceback.format_exc() newextradict = logger_extra.copy() newextradict['full_traceback'] = tb if calc.get_state() == calc_states.PARSING: execlogger.error("Error parsing calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) # TODO: add a 'comment' to the calculation try: calc._set_state(calc_states.PARSINGFAILED) except ModificationNotAllowed: pass else: execlogger.error("Error retrieving calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) try: calc._set_state(calc_states.RETRIEVALFAILED) except ModificationNotAllowed: pass raise return retrieved