Ejemplo n.º 1
0
    def test_cif_structure_roundtrip(self):
        from aiida.tools.dbexporters.tcod import export_cif, export_values
        from aiida.orm import Code
        from aiida.orm import JobCalculation
        from aiida.orm.data.cif import CifData
        from aiida.orm.data.parameter import ParameterData
        from aiida.orm.data.upf import UpfData
        from aiida.orm.data.folder import FolderData
        from aiida.common.folders import SandboxFolder
        from aiida.common.datastructures import calc_states
        import tempfile

        with tempfile.NamedTemporaryFile() as f:
            f.write('''
                data_test
                _cell_length_a    10
                _cell_length_b    10
                _cell_length_c    10
                _cell_angle_alpha 90
                _cell_angle_beta  90
                _cell_angle_gamma 90
                loop_
                _atom_site_label
                _atom_site_fract_x
                _atom_site_fract_y
                _atom_site_fract_z
                C 0 0 0
                O 0.5 0.5 0.5
            ''')
            f.flush()
            a = CifData(file=f.name)

        c = a._get_aiida_structure()
        c.store()
        pd = ParameterData()

        code = Code(local_executable='test.sh')
        with tempfile.NamedTemporaryFile() as f:
            f.write("#/bin/bash\n\necho test run\n")
            f.flush()
            code.add_path(f.name, 'test.sh')

        code.store()

        calc = JobCalculation(computer=self.computer)
        calc.set_resources({'num_machines': 1, 'num_mpiprocs_per_machine': 1})
        calc.add_link_from(code, "code")
        calc.set_environment_variables({
            'PATH': '/dev/null',
            'USER': '******'
        })

        with tempfile.NamedTemporaryFile(prefix="Fe") as f:
            f.write("<UPF version=\"2.0.1\">\nelement=\"Fe\"\n")
            f.flush()
            upf = UpfData(file=f.name)
            upf.store()
            calc.add_link_from(upf, "upf")

        with tempfile.NamedTemporaryFile() as f:
            f.write("data_test")
            f.flush()
            cif = CifData(file=f.name)
            cif.store()
            calc.add_link_from(cif, "cif")

        calc.store()
        calc._set_state(calc_states.SUBMITTING)
        with SandboxFolder() as f:
            calc._store_raw_input_folder(f.abspath)

        fd = FolderData()
        with open(
                fd._get_folder_pathsubfolder.get_abs_path(
                    calc._SCHED_OUTPUT_FILE), 'w') as f:
            f.write("standard output")
            f.flush()

        with open(
                fd._get_folder_pathsubfolder.get_abs_path(
                    calc._SCHED_ERROR_FILE), 'w') as f:
            f.write("standard error")
            f.flush()

        fd.store()
        fd.add_link_from(calc, calc._get_linkname_retrieved(), LinkType.CREATE)

        pd.add_link_from(calc, "calc", LinkType.CREATE)
        pd.store()

        with self.assertRaises(ValueError):
            export_cif(c, parameters=pd)

        c.add_link_from(calc, "calc", LinkType.CREATE)
        export_cif(c, parameters=pd)

        values = export_values(c, parameters=pd)
        values = values['0']

        self.assertEquals(values['_tcod_computation_environment'],
                          ['PATH=/dev/null\nUSER=unknown'])
        self.assertEquals(values['_tcod_computation_command'],
                          ['cd 1; ./_aiidasubmit.sh'])
Ejemplo n.º 2
0
def retrieve_calculation(calculation, transport, retrieved_temporary_folder):
    """
    Retrieve all the files of a completed job calculation using the given transport.

    If the job defined anything in the `retrieve_temporary_list`, those entries will be stored in the
    `retrieved_temporary_folder`. The caller is responsible for creating and destroying this folder.

    :param calculation: the instance of JobCalculation to update.
    :param transport: an already opened transport to use for the retrieval.
    :param retrieved_temporary_folder: the absolute path to a directory in which to store the files
        listed, if any, in the `retrieved_temporary_folder` of the jobs CalcInfo
    """
    logger_extra = get_dblogger_extra(calculation)

    execlogger.debug("Retrieving calc {}".format(calculation.pk),
                     extra=logger_extra)
    workdir = calculation._get_remote_workdir()

    execlogger.debug("[retrieval of calc {}] chdir {}".format(
        calculation.pk, workdir),
                     extra=logger_extra)

    # Create the FolderData node to attach everything to
    retrieved_files = FolderData()
    retrieved_files.add_link_from(calculation,
                                  label=calculation._get_linkname_retrieved(),
                                  link_type=LinkType.CREATE)

    with transport:
        transport.chdir(workdir)

        # First, retrieve the files of folderdata
        retrieve_list = calculation._get_retrieve_list()
        retrieve_temporary_list = calculation._get_retrieve_temporary_list()
        retrieve_singlefile_list = calculation._get_retrieve_singlefile_list()

        with SandboxFolder() as folder:
            retrieve_files_from_list(calculation, transport, folder.abspath,
                                     retrieve_list)
            # Here I retrieved everything; now I store them inside the calculation
            retrieved_files.replace_with_folder(folder.abspath, overwrite=True)

        # Second, retrieve the singlefiles
        with SandboxFolder() as folder:
            _retrieve_singlefiles(calculation, transport, folder,
                                  retrieve_singlefile_list, logger_extra)

        # Retrieve the temporary files in the retrieved_temporary_folder if any files were
        # specified in the 'retrieve_temporary_list' key
        if retrieve_temporary_list:
            retrieve_files_from_list(calculation, transport,
                                     retrieved_temporary_folder,
                                     retrieve_temporary_list)

            # Log the files that were retrieved in the temporary folder
            for filename in os.listdir(retrieved_temporary_folder):
                execlogger.debug(
                    "[retrieval of calc {}] Retrieved temporary file or folder '{}'"
                    .format(calculation.pk, filename),
                    extra=logger_extra)

        # Store everything
        execlogger.debug("[retrieval of calc {}] "
                         "Storing retrieved_files={}".format(
                             calculation.pk, retrieved_files.dbnode.pk),
                         extra=logger_extra)
        retrieved_files.store()
Ejemplo n.º 3
0
def retrieve_computed_for_authinfo(authinfo):
    from aiida.orm import JobCalculation
    from aiida.common.folders import SandboxFolder
    from aiida.orm.data.folder import FolderData
    from aiida.utils.logger import get_dblogger_extra
    from aiida.orm import DataFactory

    import os

    if not authinfo.enabled:
        return

    calcs_to_retrieve = list(
        JobCalculation._get_all_with_state(state=calc_states.COMPUTED,
                                           computer=authinfo.dbcomputer,
                                           user=authinfo.aiidauser))
    retrieved = []

    # I avoid to open an ssh connection if there are no
    # calcs with state not COMPUTED
    if len(calcs_to_retrieve):

        # Open connection
        with authinfo.get_transport() as t:
            for calc in calcs_to_retrieve:
                logger_extra = get_dblogger_extra(calc)
                t._set_logger_extra(logger_extra)

                try:
                    calc._set_state(calc_states.RETRIEVING)
                except ModificationNotAllowed:
                    # Someone else has already started to retrieve it,
                    # just log and continue
                    execlogger.debug("Attempting to retrieve more than once "
                                     "calculation {}: skipping!".format(
                                         calc.pk),
                                     extra=logger_extra)
                    continue  # with the next calculation to retrieve
                try:
                    execlogger.debug("Retrieving calc {}".format(calc.pk),
                                     extra=logger_extra)
                    workdir = calc._get_remote_workdir()
                    retrieve_list = calc._get_retrieve_list()
                    retrieve_singlefile_list = calc._get_retrieve_singlefile_list(
                    )
                    execlogger.debug("[retrieval of calc {}] "
                                     "chdir {}".format(calc.pk, workdir),
                                     extra=logger_extra)
                    t.chdir(workdir)

                    retrieved_files = FolderData()
                    retrieved_files.add_link_from(
                        calc,
                        label=calc._get_linkname_retrieved(),
                        link_type=LinkType.CREATE)

                    # First, retrieve the files of folderdata
                    with SandboxFolder() as folder:
                        for item in retrieve_list:
                            # I have two possibilities:
                            # * item is a string
                            # * or is a list
                            # then I have other two possibilities:
                            # * there are file patterns
                            # * or not
                            # First decide the name of the files
                            if isinstance(item, list):
                                tmp_rname, tmp_lname, depth = item
                                # if there are more than one file I do something differently
                                if t.has_magic(tmp_rname):
                                    remote_names = t.glob(tmp_rname)
                                    local_names = []
                                    for rem in remote_names:
                                        to_append = rem.split(
                                            os.path.sep
                                        )[-depth:] if depth > 0 else []
                                        local_names.append(
                                            os.path.sep.join([tmp_lname] +
                                                             to_append))
                                else:
                                    remote_names = [tmp_rname]
                                    to_append = remote_names.split(
                                        os.path.sep
                                    )[-depth:] if depth > 0 else []
                                    local_names = [
                                        os.path.sep.join([tmp_lname] +
                                                         to_append)
                                    ]
                                if depth > 1:  # create directories in the folder, if needed
                                    for this_local_file in local_names:
                                        new_folder = os.path.join(
                                            folder.abspath,
                                            os.path.split(this_local_file)[0])
                                        if not os.path.exists(new_folder):
                                            os.makedirs(new_folder)
                            else:  # it is a string
                                if t.has_magic(item):
                                    remote_names = t.glob(item)
                                    local_names = [
                                        os.path.split(rem)[1]
                                        for rem in remote_names
                                    ]
                                else:
                                    remote_names = [item]
                                    local_names = [os.path.split(item)[1]]

                            for rem, loc in zip(remote_names, local_names):
                                execlogger.debug(
                                    "[retrieval of calc {}] "
                                    "Trying to retrieve remote item '{}'".
                                    format(calc.pk, rem),
                                    extra=logger_extra)
                                t.get(rem,
                                      os.path.join(folder.abspath, loc),
                                      ignore_nonexisting=True)

                        # Here I retrieved everything;
                        # now I store them inside the calculation
                        retrieved_files.replace_with_folder(folder.abspath,
                                                            overwrite=True)

                    # Second, retrieve the singlefiles
                    with SandboxFolder() as folder:
                        singlefile_list = []
                        for (linkname, subclassname,
                             filename) in retrieve_singlefile_list:
                            execlogger.debug(
                                "[retrieval of calc {}] Trying "
                                "to retrieve remote singlefile '{}'".format(
                                    calc.pk, filename),
                                extra=logger_extra)
                            localfilename = os.path.join(
                                folder.abspath,
                                os.path.split(filename)[1])
                            t.get(filename,
                                  localfilename,
                                  ignore_nonexisting=True)
                            singlefile_list.append(
                                (linkname, subclassname, localfilename))

                        # ignore files that have not been retrieved
                        singlefile_list = [
                            i for i in singlefile_list if os.path.exists(i[2])
                        ]

                        # after retrieving from the cluster, I create the objects
                        singlefiles = []
                        for (linkname, subclassname,
                             filename) in singlefile_list:
                            SinglefileSubclass = DataFactory(subclassname)
                            singlefile = SinglefileSubclass()
                            singlefile.set_file(filename)
                            singlefile.add_link_from(calc,
                                                     label=linkname,
                                                     link_type=LinkType.CREATE)
                            singlefiles.append(singlefile)

                    # Finally, store
                    execlogger.debug("[retrieval of calc {}] "
                                     "Storing retrieved_files={}".format(
                                         calc.pk, retrieved_files.dbnode.pk),
                                     extra=logger_extra)
                    retrieved_files.store()
                    for fil in singlefiles:
                        execlogger.debug(
                            "[retrieval of calc {}] "
                            "Storing retrieved_singlefile={}".format(
                                calc.pk, fil.dbnode.pk),
                            extra=logger_extra)
                        fil.store()

                    # If I was the one retrieving, I should also be the only
                    # one parsing! I do not check
                    calc._set_state(calc_states.PARSING)

                    Parser = calc.get_parserclass()
                    # If no parser is set, the calculation is successful
                    successful = True
                    if Parser is not None:
                        # TODO: parse here
                        parser = Parser(calc)
                        successful, new_nodes_tuple = parser.parse_from_calc()

                        for label, n in new_nodes_tuple:
                            n.add_link_from(calc,
                                            label=label,
                                            link_type=LinkType.CREATE)
                            n.store()

                    if successful:
                        try:
                            calc._set_state(calc_states.FINISHED)
                        except ModificationNotAllowed:
                            # I should have been the only one to set it, but
                            # in order to avoid unuseful error messages, I
                            # just ignore
                            pass
                    else:
                        try:
                            calc._set_state(calc_states.FAILED)
                        except ModificationNotAllowed:
                            # I should have been the only one to set it, but
                            # in order to avoid unuseful error messages, I
                            # just ignore
                            pass
                        execlogger.error(
                            "[parsing of calc {}] "
                            "The parser returned an error, but it should have "
                            "created an output node with some partial results "
                            "and warnings. Check there for more information on "
                            "the problem".format(calc.pk),
                            extra=logger_extra)
                    retrieved.append(calc)
                except Exception:
                    import traceback

                    tb = traceback.format_exc()
                    newextradict = logger_extra.copy()
                    newextradict['full_traceback'] = tb
                    if calc.get_state() == calc_states.PARSING:
                        execlogger.error("Error parsing calc {}. "
                                         "Traceback: {}".format(calc.pk, tb),
                                         extra=newextradict)
                        # TODO: add a 'comment' to the calculation
                        try:
                            calc._set_state(calc_states.PARSINGFAILED)
                        except ModificationNotAllowed:
                            pass
                    else:
                        execlogger.error("Error retrieving calc {}. "
                                         "Traceback: {}".format(calc.pk, tb),
                                         extra=newextradict)
                        try:
                            calc._set_state(calc_states.RETRIEVALFAILED)
                        except ModificationNotAllowed:
                            pass
                        raise

    return retrieved
Ejemplo n.º 4
0
    def test_complex_graph_import_export(self):
        """
        This test checks that a small and bit complex graph can be correctly
        exported and imported.

        It will create the graph, store it to the database, export it to a file
        and import it. In the end it will check if the initial nodes are present
        at the imported graph.
        """
        import tempfile
        import shutil
        import os

        from aiida.orm.calculation.job import JobCalculation
        from aiida.orm.data.folder import FolderData
        from aiida.orm.data.parameter import ParameterData
        from aiida.orm.data.remote import RemoteData
        from aiida.common.links import LinkType
        from aiida.orm.importexport import export, import_data
        from aiida.orm.utils import load_node
        from aiida.common.exceptions import NotExistent

        temp_folder = tempfile.mkdtemp()
        try:
            calc1 = JobCalculation()
            calc1.set_computer(self.computer)
            calc1.set_resources({
                "num_machines": 1,
                "num_mpiprocs_per_machine": 1
            })
            calc1.label = "calc1"
            calc1.store()
            calc1._set_state(u'RETRIEVING')

            pd1 = ParameterData()
            pd1.label = "pd1"
            pd1.store()

            pd2 = ParameterData()
            pd2.label = "pd2"
            pd2.store()

            rd1 = RemoteData()
            rd1.label = "rd1"
            rd1.set_remote_path("/x/y.py")
            rd1.set_computer(self.computer)
            rd1.store()
            rd1.add_link_from(calc1, link_type=LinkType.CREATE)

            calc2 = JobCalculation()
            calc2.set_computer(self.computer)
            calc2.set_resources({
                "num_machines": 1,
                "num_mpiprocs_per_machine": 1
            })
            calc2.label = "calc2"
            calc2.store()
            calc2.add_link_from(pd1, link_type=LinkType.INPUT)
            calc2.add_link_from(pd2, link_type=LinkType.INPUT)
            calc2.add_link_from(rd1, link_type=LinkType.INPUT)
            calc2._set_state(u'SUBMITTING')

            fd1 = FolderData()
            fd1.label = "fd1"
            fd1.store()
            fd1.add_link_from(calc2, link_type=LinkType.CREATE)

            node_uuids_labels = {
                calc1.uuid: calc1.label,
                pd1.uuid: pd1.label,
                pd2.uuid: pd2.label,
                rd1.uuid: rd1.label,
                calc2.uuid: calc2.label,
                fd1.uuid: fd1.label
            }

            filename = os.path.join(temp_folder, "export.tar.gz")
            export([fd1.dbnode], outfile=filename, silent=True)

            self.clean_db()

            import_data(filename, silent=True, ignore_unknown_nodes=True)

            for uuid, label in node_uuids_labels.iteritems():
                try:
                    load_node(uuid)
                except NotExistent:
                    self.fail("Node with UUID {} and label {} was not "
                              "found.".format(uuid, label))

        finally:
            # Deleting the created temporary folder
            shutil.rmtree(temp_folder, ignore_errors=True)
Ejemplo n.º 5
0
def retrieve_computed_for_authinfo(authinfo):
    from aiida.orm import JobCalculation
    from aiida.common.folders import SandboxFolder
    from aiida.orm.data.folder import FolderData
    from aiida.common.log import get_dblogger_extra
    from aiida.orm import DataFactory

    from aiida.backends.utils import QueryFactory

    import os

    if not authinfo.enabled:
        return

    qmanager = QueryFactory()()
    # I create a unique set of pairs (computer, aiidauser)
    calcs_to_retrieve = qmanager.query_jobcalculations_by_computer_user_state(
        state=calc_states.COMPUTED,
        computer=authinfo.dbcomputer,
        user=authinfo.aiidauser)

    retrieved = []

    # I avoid to open an ssh connection if there are no
    # calcs with state not COMPUTED
    if len(calcs_to_retrieve):

        # Open connection
        with authinfo.get_transport() as t:
            for calc in calcs_to_retrieve:
                logger_extra = get_dblogger_extra(calc)
                t._set_logger_extra(logger_extra)

                try:
                    calc._set_state(calc_states.RETRIEVING)
                except ModificationNotAllowed:
                    # Someone else has already started to retrieve it,
                    # just log and continue
                    execlogger.debug("Attempting to retrieve more than once "
                                     "calculation {}: skipping!".format(
                                         calc.pk),
                                     extra=logger_extra)
                    continue  # with the next calculation to retrieve
                try:
                    execlogger.debug("Retrieving calc {}".format(calc.pk),
                                     extra=logger_extra)
                    workdir = calc._get_remote_workdir()
                    retrieve_list = calc._get_retrieve_list()
                    retrieve_temporary_list = calc._get_retrieve_temporary_list(
                    )
                    retrieve_singlefile_list = calc._get_retrieve_singlefile_list(
                    )
                    execlogger.debug("[retrieval of calc {}] "
                                     "chdir {}".format(calc.pk, workdir),
                                     extra=logger_extra)
                    t.chdir(workdir)

                    retrieved_files = FolderData()
                    retrieved_files.add_link_from(
                        calc,
                        label=calc._get_linkname_retrieved(),
                        link_type=LinkType.CREATE)

                    # First, retrieve the files of folderdata
                    with SandboxFolder() as folder:
                        retrieve_files_from_list(calc, t, folder,
                                                 retrieve_list)
                        # Here I retrieved everything; now I store them inside the calculation
                        retrieved_files.replace_with_folder(folder.abspath,
                                                            overwrite=True)

                    # Second, retrieve the singlefiles
                    with SandboxFolder() as folder:
                        singlefile_list = []
                        for (linkname, subclassname,
                             filename) in retrieve_singlefile_list:
                            execlogger.debug(
                                "[retrieval of calc {}] Trying "
                                "to retrieve remote singlefile '{}'".format(
                                    calc.pk, filename),
                                extra=logger_extra)
                            localfilename = os.path.join(
                                folder.abspath,
                                os.path.split(filename)[1])
                            t.get(filename,
                                  localfilename,
                                  ignore_nonexisting=True)
                            singlefile_list.append(
                                (linkname, subclassname, localfilename))

                        # ignore files that have not been retrieved
                        singlefile_list = [
                            i for i in singlefile_list if os.path.exists(i[2])
                        ]

                        # after retrieving from the cluster, I create the objects
                        singlefiles = []
                        for (linkname, subclassname,
                             filename) in singlefile_list:
                            SinglefileSubclass = DataFactory(subclassname)
                            singlefile = SinglefileSubclass()
                            singlefile.set_file(filename)
                            singlefile.add_link_from(calc,
                                                     label=linkname,
                                                     link_type=LinkType.CREATE)
                            singlefiles.append(singlefile)

                    # Retrieve the temporary files in a separate temporary folder if any files were
                    # specified in the 'retrieve_temporary_list' key
                    if retrieve_temporary_list:
                        retrieved_temporary_folder = FolderData()
                        with SandboxFolder() as folder:
                            retrieve_files_from_list(calc, t, folder,
                                                     retrieve_temporary_list)
                            retrieved_temporary_folder.replace_with_folder(
                                folder.abspath, overwrite=True)

                        # Log the files that were retrieved in the temporary folder
                        for entry in retrieved_temporary_folder.get_folder_list(
                        ):
                            execlogger.debug(
                                "[retrieval of calc {}] Retrieved temporary file or folder '{}'"
                                .format(calc.pk, entry),
                                extra=logger_extra)
                    else:
                        retrieved_temporary_folder = None

                    # Finally, store the retrieved_files node. The retrieved_temporary_folder node
                    # is explicitly not stored, but will just be passed to the parser.parse_from calc call
                    execlogger.debug(
                        "[retrieval of calc {}] Storing retrieved_files={}".
                        format(calc.pk, retrieved_files.dbnode.pk),
                        extra=logger_extra)
                    retrieved_files.store()

                    for fil in singlefiles:
                        execlogger.debug(
                            "[retrieval of calc {}] Storing retrieved_singlefile={}"
                            .format(calc.pk, fil.dbnode.pk),
                            extra=logger_extra)
                        fil.store()

                    # If I was the one retrieving, I should also be the only one parsing! I do not check
                    calc._set_state(calc_states.PARSING)

                    Parser = calc.get_parserclass()
                    # If no parser is set, the calculation is successful
                    successful = True
                    if Parser is not None:
                        parser = Parser(calc)
                        successful, new_nodes_tuple = parser.parse_from_calc(
                            retrieved_temporary_folder)

                        for label, n in new_nodes_tuple:
                            n.add_link_from(calc,
                                            label=label,
                                            link_type=LinkType.CREATE)
                            n.store()

                    if successful:
                        try:
                            calc._set_state(calc_states.FINISHED)
                        except ModificationNotAllowed:
                            # I should have been the only one to set it, but
                            # in order to avoid unuseful error messages, I
                            # just ignore
                            pass
                    else:
                        try:
                            calc._set_state(calc_states.FAILED)
                        except ModificationNotAllowed:
                            # I should have been the only one to set it, but
                            # in order to avoid unuseful error messages, I
                            # just ignore
                            pass
                        execlogger.error(
                            "[parsing of calc {}] "
                            "The parser returned an error, but it should have "
                            "created an output node with some partial results "
                            "and warnings. Check there for more information on "
                            "the problem".format(calc.pk),
                            extra=logger_extra)
                    retrieved.append(calc)
                except Exception:
                    import traceback

                    tb = traceback.format_exc()
                    newextradict = logger_extra.copy()
                    newextradict['full_traceback'] = tb
                    if calc.get_state() == calc_states.PARSING:
                        execlogger.error("Error parsing calc {}. "
                                         "Traceback: {}".format(calc.pk, tb),
                                         extra=newextradict)
                        # TODO: add a 'comment' to the calculation
                        try:
                            calc._set_state(calc_states.PARSINGFAILED)
                        except ModificationNotAllowed:
                            pass
                    else:
                        execlogger.error("Error retrieving calc {}. "
                                         "Traceback: {}".format(calc.pk, tb),
                                         extra=newextradict)
                        try:
                            calc._set_state(calc_states.RETRIEVALFAILED)
                        except ModificationNotAllowed:
                            pass
                        raise

    return retrieved