Exemple #1
0
    def _parser_function(self):
        """
        Parses the XDATCAR using custom function.
        """

        parser_warnings = {}  # return non-critical errors

        timestep = self._calc.inp.incar.dict.NSW * 1e-3  # In picoseconds

        # extract data
        try:
            step_ids, positions, time, cells, symbols = read_VASP_XDATCAR(
                self._out_folder.get_abs_path('XDATCAR'), timestep)
        except:
            print('Error parsing XDATCAR')

        #  construct proper trajectory data format
        trajectory_data = TrajectoryData()
        try:
            nodes_list = []
            trajectory_data.set_trajectory(step_ids,
                                           cells,
                                           symbols,
                                           positions,
                                           times=time)
            nodes_list.append(('trajectory_data', trajectory_data))
        except Exception, e:
            msg = ("Failed to create AiiDA data structures "
                   "(ParameterData/ArrrayData) from parsed data, "
                   "with error message:\n>> {}".format(e))
            raise OutputParsingError(msg)
Exemple #2
0
    def _get_output_nodes(self, output_path, error_path):
        """
        Extracts output nodes from the standard output and standard error
        files.
        """
        from aiida.orm.data.array.trajectory import TrajectoryData
        import re

        state = None
        step = None
        scale = None
        with open(output_path) as f:
            lines = [x.strip('\n') for x in f.readlines()]

        result_dict = dict()
        trajectory = None
        for line in lines:
            if state is None and re.match('^\s*NWChem SCF Module\s*$', line):
                state = 'nwchem-scf-module'
                continue
            if state is None and re.match(
                    '^\s*NWChem Geometry Optimization\s*$', line):
                state = 'nwchem-geometry-optimisation'
                trajectory = TrajectoryData()
                continue
            if state == 'nwchem-scf-module' and re.match(
                    '^\s*Final RHF \s*results\s*$', line):
                state = 'final-rhf-results'
                continue
            if re.match('^\s*\-*\s*$', line):
                continue
            if state == 'final-rhf-results':
                result = re.match('^\s*([^=]+?)\s*=\s*([\-\d\.]+)$', line)
                if result:
                    key = re.sub('[^a-zA-Z0-9]+', '_', result.group(1).lower())
                    result_dict[key] = result.group(2)
                else:
                    state = 'nwchem-scf-module'
            if state == 'nwchem-geometry-optimisation' and re.match(
                    '^\s*Step\s+\d+\s*$', line):
                result = re.match('^\s*Step\s+(\d+)\s*$', line)
                step = result.group(1)
                continue
            if state == 'nwchem-geometry-optimisation' and \
                re.match('^\s*Output coordinates in a.u.',line):
                state = 'nwchem-geometry-optimisation-coordinates'
                result = re.match('scale by \s(*[\-\d\.]+)', line)
                scale = result.group(1)
                continue
        return [('parameters', ParameterData(dict=result_dict))]
Exemple #3
0
    def _get_output_nodes(self, output_path, error_path):
        """
        Extracts output nodes from the standard output and standard error
        files.
        """
        from pymatgen.io.nwchem import NwOutput
        from aiida.orm.data.structure import StructureData
        from aiida.orm.data.array.trajectory import TrajectoryData

        ret_dict = []
        nwo = NwOutput(output_path)
        for out in nwo.data:
            molecules = out.pop('molecules', None)
            structures = out.pop('structures', None)
            if molecules:
                structlist = [
                    StructureData(pymatgen_molecule=m) for m in molecules
                ]
                ret_dict.append(
                    ('trajectory', TrajectoryData(structurelist=structlist)))
            if structures:
                structlist = [
                    StructureData(pymatgen_structure=s) for s in structures
                ]
                ret_dict.append(
                    ('trajectory', TrajectoryData(structurelist=structlist)))
            ret_dict.append(('output', ParameterData(dict=out)))

        # Since ParameterData rewrites it's properties (using _set_attr())
        # with keys from the supplied dictionary, ``source`` has to be
        # moved to another key. See issue #9 for details:
        # (https://bitbucket.org/epfl_theos/aiida_epfl/issues/9)
        nwo.job_info['program_source'] = nwo.job_info.pop('source', None)
        ret_dict.append(('job_info', ParameterData(dict=nwo.job_info)))

        return ret_dict
Exemple #4
0
    def create_trajectory_data(cls, cmd_to_nodeid_map,
                               cmd_to_nodeid_map_for_groups,
                               cmd_to_nodeid_map_for_nuser, group, new_user):

        from aiida.orm.data.array.trajectory import TrajectoryData
        from aiida.cmdline.commands.data import _Trajectory
        import numpy

        # Create the Trajectory data nodes
        tjn1 = TrajectoryData()

        # I create sample data
        stepids = numpy.array([60, 70])
        times = stepids * 0.01
        cells = numpy.array([[[2., 0., 0.], [0., 2., 0.], [0., 0., 2.]],
                             [[3., 0., 0.], [0., 3., 0.], [0., 0., 3.]]])
        symbols = numpy.array(['H', 'O', 'C'])
        positions = numpy.array([[[0., 0., 0.], [0.5, 0.5, 0.5],
                                  [1.5, 1.5, 1.5]],
                                 [[0., 0., 0.], [0.5, 0.5, 0.5],
                                  [1.5, 1.5, 1.5]]])
        velocities = numpy.array([[[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]],
                                  [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5],
                                   [-0.5, -0.5, -0.5]]])

        # I set the node
        tjn1.set_trajectory(stepids=stepids,
                            cells=cells,
                            symbols=symbols,
                            positions=positions,
                            times=times,
                            velocities=velocities)
        tjn1.store()

        tjn2 = TrajectoryData()
        tjn2.set_trajectory(stepids=stepids,
                            cells=cells,
                            symbols=symbols,
                            positions=positions,
                            times=times,
                            velocities=velocities)
        tjn2.store()

        # Keep track of the created objects
        cmd_to_nodeid_map[_Trajectory] = [tjn1.id, tjn2.id]

        # Add the second Trajectory data to the group
        group.add_nodes([tjn2])
        # Keep track of the id of the node that you added to the group
        cmd_to_nodeid_map_for_groups[_Trajectory] = tjn2.id

        # Create a trajectory data that belongs to another user
        tjn3 = TrajectoryData()
        tjn3.set_trajectory(stepids=stepids,
                            cells=cells,
                            symbols=symbols,
                            positions=positions,
                            times=times,
                            velocities=velocities)
        tjn3.dbnode.user = new_user._dbuser
        tjn3.store()

        # Put it is to the right map
        cmd_to_nodeid_map_for_nuser[_Trajectory] = [tjn3.id]
Exemple #5
0
    def test_export_trajectory(self):
        from aiida.orm.data.structure import StructureData
        from aiida.orm.data.array.trajectory import TrajectoryData
        from aiida.tools.dbexporters.tcod import export_values

        cells = [[[
            2.,
            0.,
            0.,
        ], [
            0.,
            2.,
            0.,
        ], [
            0.,
            0.,
            2.,
        ]], [[
            3.,
            0.,
            0.,
        ], [
            0.,
            3.,
            0.,
        ], [
            0.,
            0.,
            3.,
        ]]]
        symbols = [['H', 'O', 'C'], ['H', 'O', 'C']]
        positions = [[[0., 0., 0.], [0.5, 0.5, 0.5], [1.5, 1.5, 1.5]],
                     [[0., 0., 0.], [0.75, 0.75, 0.75], [1.25, 1.25, 1.25]]]
        structurelist = []
        for i in range(0, 2):
            struct = StructureData(cell=cells[i])
            for j, symbol in enumerate(symbols[i]):
                struct.append_atom(symbols=symbol, position=positions[i][j])
            structurelist.append(struct)

        td = TrajectoryData(structurelist=structurelist)

        with self.assertRaises(ValueError):
            # Trajectory index is not specified
            v = export_values(td)

        expected_tags = [
            '_atom_site_fract_x', '_atom_site_fract_y', '_atom_site_fract_z',
            '_atom_site_label', '_atom_site_type_symbol',
            '_audit_conform_dict_location', '_audit_conform_dict_name',
            '_audit_conform_dict_version', '_audit_creation_method',
            '_cell_angle_alpha', '_cell_angle_beta', '_cell_angle_gamma',
            '_cell_length_a', '_cell_length_b', '_cell_length_c',
            '_chemical_formula_sum', '_symmetry_Int_Tables_number',
            '_symmetry_equiv_pos_as_xyz', '_symmetry_space_group_name_H-M',
            '_symmetry_space_group_name_Hall'
        ]

        tcod_file_tags = [
            '_tcod_content_encoding_id', '_tcod_content_encoding_layer_id',
            '_tcod_content_encoding_layer_type', '_tcod_file_URI',
            '_tcod_file_content_encoding', '_tcod_file_contents',
            '_tcod_file_id', '_tcod_file_md5sum', '_tcod_file_name',
            '_tcod_file_role', '_tcod_file_sha1sum'
        ]

        # Not stored and not to be stored:
        v = export_values(td, trajectory_index=1)
        self.assertEqual(sorted(v['0'].keys()), expected_tags)

        # Stored, but not expected to be stored:
        td = TrajectoryData(structurelist=structurelist)
        td.store()
        v = export_values(td, trajectory_index=1)
        self.assertEqual(sorted(v['0'].keys()), expected_tags + tcod_file_tags)

        # Not stored, but expected to be stored:
        td = TrajectoryData(structurelist=structurelist)
        v = export_values(td, trajectory_index=1, store=True)
        self.assertEqual(sorted(v['0'].keys()), expected_tags + tcod_file_tags)

        # Both stored and expected to be stored:
        td = TrajectoryData(structurelist=structurelist)
        td.store()
        v = export_values(td, trajectory_index=1, store=True)
        self.assertEqual(sorted(v['0'].keys()), expected_tags + tcod_file_tags)

        # Stored, but asked not to include DB dump:
        td = TrajectoryData(structurelist=structurelist)
        td.store()
        v = export_values(td, trajectory_index=1, dump_aiida_database=False)
        self.assertEqual(sorted(v['0'].keys()), expected_tags)
    def parse_with_retrieved(self, retrieved):
        """
        Receives in input a dictionary of retrieved nodes.
        Does all the logic here.
        """
        from aiida.common.exceptions import InvalidOperation
        import os
        import glob

        successful = True

        # check if I'm not to overwrite anything
        #state = self._calc.get_state()
        #if state != calc_states.PARSING:
        #    raise InvalidOperation("Calculation not in {} state"
        #                           .format(calc_states.PARSING) )

        # retrieve the input parameter
        calc_input = self._calc.inp.parameters

        # look for eventual flags of the parser
        try:
            parser_opts = self._calc.inp.settings.get_dict()[
                self.get_parser_settings_key()]
        except (AttributeError, KeyError):
            parser_opts = {}

        # load the input dictionary
        # TODO: pass this input_dict to the parser. It might need it.
        input_dict = self._calc.inp.parameters.get_dict()

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()
        # at least the stdout should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            self.logger.error("Standard output not found")
            successful = False
            return successful, ()
        # if there is something more, I note it down, so to call the raw parser
        # with the right options
        # look for xml
        has_xml = False
        if self._calc._DATAFILE_XML_BASENAME in list_of_files:
            has_xml = True
        # look for bands
        has_bands = False
        if glob.glob(os.path.join(out_folder.get_abs_path('.'), 'K*[0-9]')):
            # Note: assuming format of kpoints subfolder is K*[0-9]
            has_bands = True
            # TODO: maybe it can be more general than bands only?
        out_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._OUTPUT_FILE_NAME)
        xml_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._DATAFILE_XML_BASENAME)
        dir_with_bands = out_folder.get_abs_path('.')

        # call the raw parsing function
        parsing_args = [out_file, input_dict, parser_opts]
        if has_xml:
            parsing_args.append(xml_file)
        if has_bands:
            if not has_xml:
                self.logger.warning("Cannot parse bands if xml file not "
                                    "found")
            else:
                parsing_args.append(dir_with_bands)

        out_dict, trajectory_data, structure_data, raw_successful = parse_raw_output(
            *parsing_args)

        # if calculation was not considered failed already, use the new value
        successful = raw_successful if successful else successful

        new_nodes_list = []

        # I eventually save the new structure. structure_data is unnecessary after this
        in_struc = self._calc.get_inputs_dict()['structure']
        type_calc = input_dict['CONTROL']['calculation']
        struc = in_struc
        if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']:
            if 'cell' in structure_data.keys():
                struc = convert_qe2aiida_structure(structure_data,
                                                   input_structure=in_struc)
                new_nodes_list.append(
                    (self.get_linkname_outstructure(), struc))

        k_points_list = trajectory_data.pop('k_points', None)
        k_points_weights_list = trajectory_data.pop('k_points_weights', None)

        if k_points_list is not None:

            # build the kpoints object
            if out_dict['k_points_units'] not in ['2 pi / Angstrom']:
                raise QEOutputParsingError(
                    'Error in kpoints units (should be cartesian)')
            # converting bands into a BandsData object (including the kpoints)

            kpoints_from_output = KpointsData()
            kpoints_from_output.set_cell_from_structure(struc)
            kpoints_from_output.set_kpoints(k_points_list,
                                            cartesian=True,
                                            weights=k_points_weights_list)
            kpoints_from_input = self._calc.inp.kpoints
            try:
                kpoints_from_input.get_kpoints()
            except AttributeError:
                new_nodes_list += [(self.get_linkname_out_kpoints(),
                                    kpoints_from_output)]

        # convert the dictionary into an AiiDA object
        output_params = ParameterData(dict=out_dict)
        # return it to the execmanager
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        if trajectory_data:
            import numpy
            from aiida.orm.data.array.trajectory import TrajectoryData
            from aiida.orm.data.array import ArrayData

            try:
                positions = numpy.array(
                    trajectory_data.pop('atomic_positions_relax'))
                try:
                    cells = numpy.array(
                        trajectory_data.pop('lattice_vectors_relax'))
                    # if KeyError, the MD was at fixed cell
                except KeyError:
                    cells = numpy.array([in_struc.cell] * len(positions))

                symbols = numpy.array(
                    [str(i.kind_name) for i in in_struc.sites])
                stepids = numpy.arange(
                    len(positions))  # a growing integer per step
                # I will insert time parsing when they fix their issues about time
                # printing (logic is broken if restart is on)

                traj = TrajectoryData()
                traj.set_trajectory(
                    stepids=stepids,
                    cells=cells,
                    symbols=symbols,
                    positions=positions,
                )
                for x in trajectory_data.iteritems():
                    traj.set_array(x[0], numpy.array(x[1]))
                # return it to the execmanager
                new_nodes_list.append(
                    (self.get_linkname_outtrajectory(), traj))

            except KeyError:  # forces in scf calculation (when outputed)
                arraydata = ArrayData()
                for x in trajectory_data.iteritems():
                    arraydata.set_array(x[0], numpy.array(x[1]))
                # return it to the execmanager
                new_nodes_list.append(
                    (self.get_linkname_outarray(), arraydata))

        return successful, new_nodes_list
Exemple #7
0
    def parse_with_retrieved(self, retrieved):
        """
        Receives in input a dictionary of retrieved nodes.
        Does all the logic here.
        """
        from aiida.common.exceptions import InvalidOperation
        import os, copy
        import numpy  # TrajectoryData also uses numpy arrays

        successful = True

        # check if I'm not to overwrite anything
        state = self._calc.get_state()
        if state != calc_states.PARSING:
            raise InvalidOperation("Calculation not in {} state"
                                   .format(calc_states.PARSING))

        # get the input structure
        input_structure = self._calc.inp.structure

        # load the input dictionary
        # TODO: pass this input_dict to the parser. It might need it.            
        input_dict = self._calc.inp.parameters.get_dict()

        # Check that the retrieved folder is there 
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()
        # at least the stdout should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            successful = False
            new_nodes_tuple = ()
            self.logger.error("Standard output not found")
            return successful, new_nodes_tuple

        # if there is something more, I note it down, so to call the raw parser
        # with the right options
        # look for xml
        out_file = out_folder.get_abs_path(self._calc._OUTPUT_FILE_NAME)

        xml_file = None
        if self._calc._DATAFILE_XML_BASENAME in list_of_files:
            xml_file = out_folder.get_abs_path(self._calc._DATAFILE_XML_BASENAME)

        xml_counter_file = None
        if self._calc._FILE_XML_PRINT_COUNTER in list_of_files:
            xml_counter_file = out_folder.get_abs_path(
                self._calc._FILE_XML_PRINT_COUNTER)

        parsing_args = [out_file, xml_file, xml_counter_file]

        # call the raw parsing function
        out_dict, raw_successful = parse_cp_raw_output(*parsing_args)

        successful = True if raw_successful else False

        # parse the trajectory. Units in Angstrom, picoseconds and eV.
        # append everthing in the temporary dictionary raw_trajectory
        expected_configs = None
        raw_trajectory = {}
        evp_keys = ['electronic_kinetic_energy', 'cell_temperature', 'ionic_temperature',
                    'scf_total_energy', 'enthalpy', 'enthalpy_plus_kinetic',
                    'energy_constant_motion', 'volume', 'pressure']
        pos_vel_keys = ['cells', 'positions', 'times', 'velocities']
        # set a default null values

        # Now prepare the reordering, as filex in the xml are  ordered
        reordering = self._generate_sites_ordering(out_dict['species'],
                                                   out_dict['atoms'])

        # =============== POSITIONS trajectory ============================
        try:
            with open(out_folder.get_abs_path(
                    '{}.pos'.format(self._calc._PREFIX))) as posfile:
                pos_data = [l.split() for l in posfile]
                # POSITIONS stored in angstrom
            traj_data = parse_cp_traj_stanzas(num_elements=out_dict['number_of_atoms'],
                                              splitlines=pos_data,
                                              prepend_name='positions_traj',
                                              rescale=bohr_to_ang)

            # here initialize the dictionary. If the parsing of positions fails, though, I don't have anything
            # out of the CP dynamics. Therefore, the calculation status is set to FAILED.
            raw_trajectory['positions_ordered'] = self._get_reordered_array(traj_data['positions_traj_data'],
                                                                            reordering)
            raw_trajectory['times'] = numpy.array(traj_data['positions_traj_times'])
        except IOError:
            out_dict['warnings'].append("Unable to open the POS file... skipping.")
            successful = False
        except Exception as e:
            out_dict['warnings'].append("Error parsing POS file ({}). Skipping file."
                                        .format(e.message))
            successful = False

        # =============== CELL trajectory ============================
        try:
            with open(os.path.join(out_folder.get_abs_path('.'),
                                   '{}.cel'.format(self._calc._PREFIX))) as celfile:
                cel_data = [l.split() for l in celfile]
            traj_data = parse_cp_traj_stanzas(num_elements=3,
                                              splitlines=cel_data,
                                              prepend_name='cell_traj',
                                              rescale=bohr_to_ang)
            raw_trajectory['cells'] = numpy.array(traj_data['cell_traj_data'])
        except IOError:
            out_dict['warnings'].append("Unable to open the CEL file... skipping.")
        except Exception as e:
            out_dict['warnings'].append("Error parsing CEL file ({}). Skipping file."
                                        .format(e.message))

        # =============== VELOCITIES trajectory ============================
        try:
            with open(os.path.join(out_folder.get_abs_path('.'),
                                   '{}.vel'.format(self._calc._PREFIX))) as velfile:
                vel_data = [l.split() for l in velfile]
            traj_data = parse_cp_traj_stanzas(num_elements=out_dict['number_of_atoms'],
                                              splitlines=vel_data,
                                              prepend_name='velocities_traj',
                                              rescale=bohr_to_ang / timeau_to_sec * 10 ** 12)  # velocities in ang/ps,
            raw_trajectory['velocities_ordered'] = self._get_reordered_array(traj_data['velocities_traj_data'],
                                                                             reordering)
        except IOError:
            out_dict['warnings'].append("Unable to open the VEL file... skipping.")
        except Exception as e:
            out_dict['warnings'].append("Error parsing VEL file ({}). Skipping file."
                                        .format(e.message))

        # =============== EVP trajectory ============================
        try:
            matrix = numpy.genfromtxt(os.path.join(out_folder.get_abs_path('.'),
                                                   '{}.evp'.format(self._calc._PREFIX)))
            # there might be a different format if the matrix has one row only
            try:
                matrix.shape[1]
            except IndexError:
                matrix = numpy.array(numpy.matrix(matrix))

            raw_trajectory['steps'] = numpy.array(matrix[:, 0], dtype=int)
            raw_trajectory['electronic_kinetic_energy'] = matrix[:, 1] * hartree_to_ev  # EKINC, eV
            raw_trajectory['cell_temperature'] = matrix[:, 2]  # TEMPH, K
            raw_trajectory['ionic_temperature'] = matrix[:, 3]  # TEMPP, K
            raw_trajectory['scf_total_energy'] = matrix[:, 4] * hartree_to_ev  # ETOT, eV
            raw_trajectory['enthalpy'] = matrix[:, 5] * hartree_to_ev  # ENTHAL, eV
            raw_trajectory['enthalpy_plus_kinetic'] = matrix[:, 6] * hartree_to_ev  # ECONS, eV
            raw_trajectory['energy_constant_motion'] = matrix[:, 7] * hartree_to_ev  # ECONT, eV
            raw_trajectory['volume'] = matrix[:, 8] * (bohr_to_ang ** 3)  # volume, angstrom^3
            raw_trajectory['pressure'] = matrix[:, 9]  # out_press, GPa
        except Exception as e:
            out_dict['warnings'].append("Error parsing EVP file ({}). Skipping file.".format(e.message))
        except IOError:
            out_dict['warnings'].append("Unable to open the EVP file... skipping.")

        # get the symbols from the input        
        # TODO: I should have kinds in TrajectoryData
        raw_trajectory['symbols'] = numpy.array([str(i.kind_name) for i in input_structure.sites])

        traj = TrajectoryData()
        traj.set_trajectory(stepids=raw_trajectory['steps'],
                            cells=raw_trajectory['cells'],
                            symbols=raw_trajectory['symbols'],
                            positions=raw_trajectory['positions_ordered'],
                            times=raw_trajectory['times'],
                            velocities=raw_trajectory['velocities_ordered'],
        )

        for this_name in evp_keys:
            traj.set_array(this_name, raw_trajectory[this_name])
        new_nodes_list = [(self.get_linkname_trajectory(), traj)]

        # convert the dictionary into an AiiDA object
        output_params = ParameterData(dict=out_dict)
        # save it into db
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        return successful, new_nodes_list
Exemple #8
0
    def parse_with_retrieved(self, retrieved):
        """
        Parse the output nodes for a PwCalculations from a dictionary of retrieved nodes.
        Two nodes that are expected are the default 'retrieved' FolderData node which will
        store the retrieved files permanently in the repository. The second required node
        is the unstored FolderData node with the temporary retrieved files, which should
        be passed under the key 'retrieved_temporary_folder_key' of the Parser class.

        :param retrieved: a dictionary of retrieved nodes
        """
        import os
        import numpy

        successful = True

        # Load the input dictionary
        parameters = self._calc.inp.parameters.get_dict()

        # Look for optional settings input node and potential 'parser_options' dictionary within it
        try:
            settings = self._calc.inp.settings.get_dict()
            parser_opts = settings[self.get_parser_settings_key()]
        except (AttributeError, KeyError):
            settings = {}
            parser_opts = {}

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # Verify that the retrieved_temporary_folder is within the arguments if temporary files were specified
        if self._calc._get_retrieve_temporary_list():
            try:
                temporary_folder = retrieved[self.retrieved_temporary_folder_key]
                dir_with_bands = temporary_folder.get_abs_path('.')
            except KeyError:
                self.logger.error('the {} was not passed as an argument'.format(self.retrieved_temporary_folder_key))
                return False, ()
        else:
            dir_with_bands = None

        list_of_files = out_folder.get_folder_list()

        # The stdout is required for parsing
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            self.logger.error("The standard output file '{}' was not found but is required".format(self._calc._OUTPUT_FILE_NAME))
            return False, ()

        # The xml file is required for parsing
        if not self._calc._DATAFILE_XML_BASENAME in list_of_files:
            self.logger.error("The xml output file '{}' was not found but is required".format(self._calc._DATAFILE_XML_BASENAME))
            successful = False
            xml_file = None
        else:
            xml_file = os.path.join(out_folder.get_abs_path('.'), self._calc._DATAFILE_XML_BASENAME)

        out_file = os.path.join(out_folder.get_abs_path('.'), self._calc._OUTPUT_FILE_NAME)

        # Call the raw parsing function
        parsing_args = [out_file, parameters, parser_opts, xml_file, dir_with_bands]
        out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output(*parsing_args)

        # If calculation was not considered failed already, use the new value
        successful = raw_successful if successful else successful

        # The symmetry info has large arrays, that occupy most of the database.
        # turns out most of this is due to 64 matrices that are repeated over and over again.
        # therefore I map part of the results in a list of dictionaries wrote here once and for all
        # if the parser_opts has a key all_symmetries set to True, I don't reduce it
        all_symmetries = parser_opts.get('all_symmetries', False)
        if not all_symmetries:
            try:
                if 'symmetries' in out_dict.keys():
                    old_symmetries = out_dict['symmetries']
                    new_symmetries = []
                    for this_sym in old_symmetries:
                        name = this_sym['name']
                        index = None
                        for i,this in enumerate(self._possible_symmetries):
                            if name in this['name']:
                                index = i
                        if index is None:
                            self.logger.error("Symmetry {} not found".format(name))
                        new_dict = {}
                        # note: here I lose the information about equivalent
                        # ions and fractional_translation.
                        # They will be present with all_symmetries=True
                        new_dict['t_rev'] = this_sym['t_rev']
                        new_dict['symmetry_number'] = index
                        new_symmetries.append(new_dict)
                    out_dict['symmetries'] = new_symmetries # and overwrite the old one
            except KeyError: # no symmetries were parsed (failed case, likely)
                self.logger.error("No symmetries were found in output")

        new_nodes_list = []

        # I eventually save the new structure. structure_data is unnecessary after this
        in_struc = self._calc.get_inputs_dict()['structure']
        type_calc = parameters['CONTROL']['calculation']
        struc = in_struc
        if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']:
            if 'cell' in structure_data.keys():
                struc = convert_qe2aiida_structure(structure_data, input_structure=in_struc)
                new_nodes_list.append((self.get_linkname_outstructure(), struc))

        k_points_list = trajectory_data.pop('k_points', None)
        k_points_weights_list = trajectory_data.pop('k_points_weights', None)

        if k_points_list is not None:

            # Build the kpoints object
            if out_dict['k_points_units'] not in ['2 pi / Angstrom']:
                raise QEOutputParsingError('Error in kpoints units (should be cartesian)')

            kpoints_from_output = KpointsData()
            kpoints_from_output.set_cell_from_structure(struc)
            kpoints_from_output.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list)
            kpoints_from_input = self._calc.inp.kpoints

            if not bands_data:
                try:
                    kpoints_from_input.get_kpoints()
                except AttributeError:
                    new_nodes_list += [(self.get_linkname_out_kpoints(), kpoints_from_output)]

            # Converting bands into a BandsData object (including the kpoints)
            if bands_data:
                kpoints_for_bands = kpoints_from_output

                try:
                    kpoints_from_input.get_kpoints()
                    kpoints_for_bands.labels = kpoints_from_input.labels
                except (AttributeError, ValueError, TypeError):
                    # AttributeError: no list of kpoints in input
                    # ValueError: labels from input do not match the output
                    #      list of kpoints (some kpoints are missing)
                    # TypeError: labels are not set, so kpoints_from_input.labels=None
                    pass

                # Get the bands occupations and correct the occupations of QE:
                # If it computes only one component, it occupies it with half number of electrons
                try:
                    bands_data['occupations'][1]
                    the_occupations = bands_data['occupations']
                except IndexError:
                    the_occupations = 2.*numpy.array(bands_data['occupations'][0])

                try:
                    bands_data['bands'][1]
                    bands_energies = bands_data['bands']
                except IndexError:
                    bands_energies = bands_data['bands'][0]

                the_bands_data = BandsData()
                the_bands_data.set_kpointsdata(kpoints_for_bands)
                the_bands_data.set_bands(bands_energies,
                                         units = bands_data['bands_units'],
                                         occupations = the_occupations)

                new_nodes_list += [('output_band', the_bands_data)]
                out_dict['linknames_band'] = ['output_band']

        # Separate the atomic_occupations dictionary in its own node if it is present
        atomic_occupations = out_dict.get('atomic_occupations', {})
        if atomic_occupations:
            out_dict.pop('atomic_occupations')
            atomic_occupations_node = ParameterData(dict=atomic_occupations)
            new_nodes_list.append(('output_atomic_occupations', atomic_occupations_node))

        output_params = ParameterData(dict=out_dict)
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        if trajectory_data:
            from aiida.orm.data.array.trajectory import TrajectoryData
            from aiida.orm.data.array import ArrayData
            try:
                positions = numpy.array( trajectory_data.pop('atomic_positions_relax'))
                try:
                    cells = numpy.array( trajectory_data.pop('lattice_vectors_relax'))
                    # if KeyError, the MD was at fixed cell
                except KeyError:
                    cells = numpy.array([in_struc.cell] * len(positions))

                symbols = numpy.array([str(i.kind_name) for i in in_struc.sites])
                stepids = numpy.arange(len(positions)) # a growing integer per step
                # I will insert time parsing when they fix their issues about time
                # printing (logic is broken if restart is on)

                traj = TrajectoryData()
                traj.set_trajectory(
                    stepids = stepids,
                    cells = cells,
                    symbols = symbols,
                    positions = positions,
                )
                for x in trajectory_data.iteritems():
                    traj.set_array(x[0],numpy.array(x[1]))
                new_nodes_list.append((self.get_linkname_outtrajectory(),traj))

            except KeyError:
                # forces, atomic charges and atomic mag. moments, in scf calculation (when outputed)
                arraydata = ArrayData()
                for x in trajectory_data.iteritems():
                    arraydata.set_array(x[0],numpy.array(x[1]))
                new_nodes_list.append((self.get_linkname_outarray(),arraydata))

        return successful, new_nodes_list
Exemple #9
0
    def parse_with_retrieved(self, retrieved):
        """
        Parses the calculation-output datafolder, and stores
        results.

        :param retrieved: a dictionary of retrieved nodes, where the keys
            are the link names of retrieved nodes, and the values are the
            nodes.
        """
        from aiida.common.exceptions import InvalidOperation
        from aiida.orm.data.array.trajectory import TrajectoryData
        from aiida.orm.data.array import ArrayData
        import os
        import numpy
        import copy

        successful = True

        # check if I'm not to overwrite anything
        #state = self._calc.get_state()
        #if state != calc_states.PARSING:
        #    raise InvalidOperation("Calculation not in {} state"
        #                           .format(calc_states.PARSING) )

        # look for eventual flags of the parser
        try:
            parser_opts = self._calc.inp.settings.get_dict()[
                self.get_parser_settings_key()]
        except (AttributeError, KeyError):
            parser_opts = {}

        # load the pw input dictionary
        pw_input_dict = self._calc.inp.pw_parameters.get_dict()

        # load the pw input dictionary
        neb_input_dict = self._calc.inp.neb_parameters.get_dict()

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            successful = False
            return successful, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()
        # at least the stdout should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            self.logger.error("Standard output not found")
            successful = False
            return successful, ()

        out_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._OUTPUT_FILE_NAME)

        # First parse the Neb output
        neb_out_dict, iteration_data, raw_successful = parse_raw_output_neb(
            out_file, neb_input_dict)

        # if calculation was not considered failed already, use the new value
        successful = raw_successful if successful else successful

        # Retrieve the number of images
        try:
            num_images = neb_input_dict['num_of_images']
        except KeyError:
            try:
                num_images = neb_out_dict['num_of_images']
            except KeyError:
                self.logger.error(
                    "Impossible to understand the number of images")
                successful = False
                return successful, ()

        # Now parse the information from the single pw calculations for the different images
        image_data = {}
        positions = []
        cells = []
        for i in range(num_images):
            # look for xml and parse
            xml_file = os.path.join(out_folder.get_abs_path('.'),
                                    self._calc._PREFIX + '_{}'.format(i + 1),
                                    self._calc._PREFIX + '.save',
                                    self._calc._DATAFILE_XML_BASENAME)
            try:
                with open(xml_file, 'r') as f:
                    xml_lines = f.read()  # Note: read() and not readlines()
            except IOError:
                self.logger.error(
                    "No xml file found for image {} at {}".format(
                        i + 1, xml_file))
                successful = False
                return successful, ()
            xml_data, structure_dict, bands_data = parse_pw_xml_output(
                xml_lines)

            # convert the dictionary obtained from parsing the xml to an AiiDA StructureData
            structure_data = convert_qe2aiida_structure(structure_dict)

            # look for pw output and parse it
            pw_out_file = os.path.join(
                out_folder.get_abs_path('.'),
                self._calc._PREFIX + '_{}'.format(i + 1), 'PW.out')
            try:
                with open(pw_out_file, 'r') as f:
                    pw_out_lines = f.read()  # Note: read() and not readlines()
            except IOError:
                self.logger.error(
                    "No pw output file found for image {}".format(i + 1))
                successful = False
                return successful, ()

            pw_out_data, trajectory_data, critical_messages = parse_pw_text_output(
                pw_out_lines, xml_data, structure_dict, pw_input_dict)

            # I add in the out_data all the last elements of trajectory_data values.
            # Safe for some large arrays, that I will likely never query.
            skip_keys = [
                'forces', 'atomic_magnetic_moments', 'atomic_charges',
                'lattice_vectors_relax', 'atomic_positions_relax',
                'atomic_species_name'
            ]
            tmp_trajectory_data = copy.copy(trajectory_data)
            for x in tmp_trajectory_data.iteritems():
                if x[0] in skip_keys:
                    continue
                pw_out_data[x[0]] = x[1][-1]
                if len(x[1]) == 1:  # delete eventual keys that are not arrays
                    trajectory_data.pop(x[0])
            # As the k points are an array that is rather large, and again it's not something I'm going to parse likely
            # since it's an info mainly contained in the input file, I move it to the trajectory data
            for key in ['k_points', 'k_points_weights']:
                try:
                    trajectory_data[key] = xml_data.pop(key)
                except KeyError:
                    pass

            key = 'pw_output_image_{}'.format(i + 1)
            image_data[key] = dict(pw_out_data.items() + xml_data.items())

            positions.append([site.position for site in structure_data.sites])
            cells.append(structure_data.cell)

            # If a warning was already present in the NEB, add also PW warnings to the neb output data,
            # avoiding repetitions.
            if neb_out_dict['warnings']:
                for warning in pw_out_data['warnings']:
                    if warning not in neb_out_dict['warnings']:
                        neb_out_dict['warnings'].append(warning)

        # Symbols can be obtained simply from the last image
        symbols = [str(site.kind_name) for site in structure_data.sites]

        new_nodes_list = []

        # convert the dictionary into an AiiDA object
        output_params = ParameterData(dict=dict(neb_out_dict.items() +
                                                image_data.items()))

        # return it to the execmanager
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        # convert data on structure of images into a TrajectoryData
        traj = TrajectoryData()
        traj.set_trajectory(
            stepids=numpy.arange(1, num_images + 1),
            cells=numpy.array(cells),
            symbols=numpy.array(symbols),
            positions=numpy.array(positions),
        )

        # return it to the execmanager
        new_nodes_list.append((self.get_linkname_outtrajectory(), traj))

        if parser_opts.get('all_iterations', False):
            if iteration_data:
                from aiida.orm.data.array import ArrayData

                arraydata = ArrayData()
                for x in iteration_data.iteritems():
                    arraydata.set_array(x[0], numpy.array(x[1]))
                new_nodes_list.append(
                    (self.get_linkname_iterationarray(), arraydata))

        # Load the original and interpolated energy profile along the minimum-energy path (mep)
        try:
            mep_file = os.path.join(out_folder.get_abs_path('.'),
                                    self._calc._PREFIX + '.dat')
            mep = numpy.loadtxt(mep_file)
        except Exception:
            self.logger.warning(
                "Impossible to find the file with image energies "
                "versus reaction coordinate.")
            mep = numpy.array([[]])

        try:
            interp_mep_file = os.path.join(out_folder.get_abs_path('.'),
                                           self._calc._PREFIX + '.int')
            interp_mep = numpy.loadtxt(interp_mep_file)
        except Exception:
            self.logger.warning(
                "Impossible to find the file with the interpolation "
                "of image energies versus reaction coordinate.")
            interp_mep = numpy.array([[]])
        # Create an ArrayData with the energy profiles
        mep_arraydata = ArrayData()
        mep_arraydata.set_array('mep', mep)
        mep_arraydata.set_array('interpolated_mep', interp_mep)
        new_nodes_list.append((self.get_linkname_meparray(), mep_arraydata))

        return successful, new_nodes_list
Exemple #10
0
    def parse_with_retrieved(self, retrieved):
        """
        Parses the datafolder, stores results.
        """

        # suppose at the start that the job is successful
        successful = True

        # select the folder object
        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()

        # OUTPUT file should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            successful = False
            self.logger.error("Output file not found")
            return successful, ()

        # Get file and do the parsing
        outfile = out_folder.get_abs_path(self._calc._OUTPUT_FILE_NAME)
        ouput_trajectory = out_folder.get_abs_path(
            self._calc._OUTPUT_TRAJECTORY_FILE_NAME)

        timestep = self._calc.inp.parameters.dict.timestep
        positions, step_ids, cells, symbols, time = read_lammps_trajectory(
            ouput_trajectory, timestep=timestep)

        # Delete trajectory once parsed
        try:
            import os
            os.remove(ouput_trajectory)
        except:
            pass

#        force_constants = parse_FORCE_CONSTANTS(outfile)

# look at warnings
        warnings = []
        with open(out_folder.get_abs_path(self._calc._SCHED_ERROR_FILE)) as f:
            errors = f.read()
        if errors:
            warnings = [errors]

        # ====================== prepare the output node ======================

        # save the outputs
        new_nodes_list = []

        # save trajectory into node
        try:
            trajectory_data = TrajectoryData()
            trajectory_data.set_trajectory(step_ids,
                                           cells,
                                           symbols,
                                           positions,
                                           times=time)
            new_nodes_list.append(('trajectory_data', trajectory_data))
        except KeyError:  # keys not found in json
            pass

        # add the dictionary with warnings
        new_nodes_list.append((self.get_linkname_outparams(),
                               ParameterData(dict={'warnings': warnings})))

        return successful, new_nodes_list
Exemple #11
0
    def parse_with_retrieved(self, retrieved):
        """
        Receives in input a dictionary of retrieved nodes.
        Does all the logic here.
        """
        from aiida.common.exceptions import InvalidOperation
        import os
        import glob

        successful = True

        # check if I'm not to overwrite anything
        #state = self._calc.get_state()
        #if state != calc_states.PARSING:
        #    raise InvalidOperation("Calculation not in {} state"
        #                           .format(calc_states.PARSING) )

        # look for eventual flags of the parser
        try:
            parser_opts = self._calc.inp.settings.get_dict()[
                self.get_parser_settings_key()]
        except (AttributeError, KeyError):
            parser_opts = {}

        # load the input dictionary
        # TODO: pass this input_dict to the parser. It might need it.
        input_dict = self._calc.inp.parameters.get_dict()

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()
        # at least the stdout should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            self.logger.error("Standard output not found")
            successful = False
            return successful, ()
        # if there is something more, I note it down, so to call the raw parser
        # with the right options
        # look for xml
        has_xml = False
        if self._calc._DATAFILE_XML_BASENAME in list_of_files:
            has_xml = True
        # look for bands
        has_bands = False
        if glob.glob(os.path.join(out_folder.get_abs_path('.'), 'K*[0-9]')):
            # Note: assuming format of kpoints subfolder is K*[0-9]
            has_bands = True
            # TODO: maybe it can be more general than bands only?
        out_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._OUTPUT_FILE_NAME)
        xml_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._DATAFILE_XML_BASENAME)
        dir_with_bands = out_folder.get_abs_path('.')

        # call the raw parsing function
        parsing_args = [out_file, input_dict, parser_opts]
        if has_xml:
            parsing_args.append(xml_file)
        if has_bands:
            if not has_xml:
                self.logger.warning("Cannot parse bands if xml file not "
                                    "found")
            else:
                parsing_args.append(dir_with_bands)

        out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output(
            *parsing_args)

        # if calculation was not considered failed already, use the new value
        successful = raw_successful if successful else successful

        # The symmetry info has large arrays, that occupy most of the database.
        # turns out most of this is due to 64 matrices that are repeated over and over again.
        # therefore I map part of the results in a list of dictionaries wrote here once and for all
        # if the parser_opts has a key all_symmetries set to True, I don't reduce it
        all_symmetries = parser_opts.get('all_symmetries', False)
        if not all_symmetries:
            try:
                if 'symmetries' in out_dict.keys():
                    old_symmetries = out_dict['symmetries']
                    new_symmetries = []
                    for this_sym in old_symmetries:
                        name = this_sym['name']
                        index = None
                        for i, this in enumerate(self._possible_symmetries):
                            if name in this['name']:
                                index = i
                        if index is None:
                            self.logger.error(
                                "Symmetry {} not found".format(name))
                        new_dict = {}
                        # note: here I lose the information about equivalent
                        # ions and fractional_translation.
                        # They will be present with all_symmetries=True
                        new_dict['t_rev'] = this_sym['t_rev']
                        new_dict['symmetry_number'] = index
                        new_symmetries.append(new_dict)
                    out_dict[
                        'symmetries'] = new_symmetries  # and overwrite the old one
            except KeyError:  # no symmetries were parsed (failed case, likely)
                self.logger.error("No symmetries were found in output")

        new_nodes_list = []

        # I eventually save the new structure. structure_data is unnecessary after this
        in_struc = self._calc.get_inputs_dict()['structure']
        type_calc = input_dict['CONTROL']['calculation']
        struc = in_struc
        if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']:
            if 'cell' in structure_data.keys():
                struc = convert_qe2aiida_structure(structure_data,
                                                   input_structure=in_struc)
                new_nodes_list.append(
                    (self.get_linkname_outstructure(), struc))

        k_points_list = trajectory_data.pop('k_points', None)
        k_points_weights_list = trajectory_data.pop('k_points_weights', None)

        if k_points_list is not None:

            # build the kpoints object
            if out_dict['k_points_units'] not in ['2 pi / Angstrom']:
                raise QEOutputParsingError(
                    'Error in kpoints units (should be cartesian)')
            # converting bands into a BandsData object (including the kpoints)

            kpoints_from_output = KpointsData()
            kpoints_from_output.set_cell_from_structure(struc)
            kpoints_from_output.set_kpoints(k_points_list,
                                            cartesian=True,
                                            weights=k_points_weights_list)
            kpoints_from_input = self._calc.inp.kpoints

            if not bands_data:
                try:
                    kpoints_from_input.get_kpoints()
                except AttributeError:
                    new_nodes_list += [(self.get_linkname_out_kpoints(),
                                        kpoints_from_output)]

            if bands_data:
                import numpy
                # converting bands into a BandsData object (including the kpoints)

                kpoints_for_bands = kpoints_from_output

                try:
                    kpoints_from_input.get_kpoints()
                    kpoints_for_bands.labels = kpoints_from_input.labels
                except (AttributeError, ValueError, TypeError):
                    # AttributeError: no list of kpoints in input
                    # ValueError: labels from input do not match the output
                    #      list of kpoints (some kpoints are missing)
                    # TypeError: labels are not set, so kpoints_from_input.labels=None
                    pass

                # get the bands occupations.
                # correct the occupations of QE: if it computes only one component,
                # it occupies it with half number of electrons
                try:
                    bands_data['occupations'][1]
                    the_occupations = bands_data['occupations']
                except IndexError:
                    the_occupations = 2. * numpy.array(
                        bands_data['occupations'][0])

                try:
                    bands_data['bands'][1]
                    bands_energies = bands_data['bands']
                except IndexError:
                    bands_energies = bands_data['bands'][0]

                the_bands_data = BandsData()
                the_bands_data.set_kpointsdata(kpoints_for_bands)
                the_bands_data.set_bands(bands_energies,
                                         units=bands_data['bands_units'],
                                         occupations=the_occupations)

                new_nodes_list += [('output_band', the_bands_data)]
                out_dict['linknames_band'] = ['output_band']

        # convert the dictionary into an AiiDA object
        output_params = ParameterData(dict=out_dict)
        # return it to the execmanager
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        if trajectory_data:
            import numpy
            from aiida.orm.data.array.trajectory import TrajectoryData
            from aiida.orm.data.array import ArrayData
            try:
                positions = numpy.array(
                    trajectory_data.pop('atomic_positions_relax'))
                try:
                    cells = numpy.array(
                        trajectory_data.pop('lattice_vectors_relax'))
                    # if KeyError, the MD was at fixed cell
                except KeyError:
                    cells = numpy.array([in_struc.cell] * len(positions))

                symbols = numpy.array(
                    [str(i.kind_name) for i in in_struc.sites])
                stepids = numpy.arange(
                    len(positions))  # a growing integer per step
                # I will insert time parsing when they fix their issues about time
                # printing (logic is broken if restart is on)

                traj = TrajectoryData()
                traj.set_trajectory(
                    stepids=stepids,
                    cells=cells,
                    symbols=symbols,
                    positions=positions,
                )
                for x in trajectory_data.iteritems():
                    traj.set_array(x[0], numpy.array(x[1]))
                # return it to the execmanager
                new_nodes_list.append(
                    (self.get_linkname_outtrajectory(), traj))

            except KeyError:
                # forces, atomic charges and atomic mag. moments, in scf
                # calculation (when outputed)
                arraydata = ArrayData()
                for x in trajectory_data.iteritems():
                    arraydata.set_array(x[0], numpy.array(x[1]))
                # return it to the execmanager
                new_nodes_list.append(
                    (self.get_linkname_outarray(), arraydata))

        return successful, new_nodes_list
Exemple #12
0
    def parse_with_retrieved(self, retrieved):
        """
        Receives in input a dictionary of retrieved nodes.
        Does all the logic here.
        """
        from aiida.common.exceptions import InvalidOperation
        import os, numpy
        from distutils.version import LooseVersion
        
        successful = True

        # check if I'm not to overwrite anything
        state = self._calc.get_state()
        if state != calc_states.PARSING:
            raise InvalidOperation("Calculation not in {} state"
                                   .format(calc_states.PARSING))

        # get the input structure
        input_structure = self._calc.inp.structure

        # load the input dictionary
        # TODO: pass this input_dict to the parser. It might need it.            
        input_dict = self._calc.inp.parameters.get_dict()

        # Check that the retrieved folder is there 
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()
        # at least the stdout should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            successful = False
            new_nodes_tuple = ()
            self.logger.error("Standard output not found")
            return successful, new_nodes_tuple

        # if there is something more, I note it down, so to call the raw parser
        # with the right options
        # look for xml
        out_file = out_folder.get_abs_path(self._calc._OUTPUT_FILE_NAME)

        xml_file = None
        if self._calc._DATAFILE_XML_BASENAME in list_of_files:
            xml_file = out_folder.get_abs_path(self._calc._DATAFILE_XML_BASENAME)

        xml_counter_file = None
        if self._calc._FILE_XML_PRINT_COUNTER in list_of_files:
            xml_counter_file = out_folder.get_abs_path(
                self._calc._FILE_XML_PRINT_COUNTER)

        parsing_args = [out_file, xml_file, xml_counter_file]

        # call the raw parsing function
        out_dict, raw_successful = parse_cp_raw_output(*parsing_args)

        successful = True if raw_successful else False

        # parse the trajectory. Units in Angstrom, picoseconds and eV.
        # append everthing in the temporary dictionary raw_trajectory
        expected_configs = None
        raw_trajectory = {}
        evp_keys = ['electronic_kinetic_energy', 'cell_temperature', 'ionic_temperature',
                    'scf_total_energy', 'enthalpy', 'enthalpy_plus_kinetic',
                    'energy_constant_motion', 'volume', 'pressure']
        pos_vel_keys = ['cells', 'positions', 'times', 'velocities']
        # set a default null values

        # Now prepare the reordering, as filex in the xml are  ordered
        reordering = self._generate_sites_ordering(out_dict['species'],
                                                   out_dict['atoms'])

        # =============== POSITIONS trajectory ============================
        try:
            with open(out_folder.get_abs_path(
                    '{}.pos'.format(self._calc._PREFIX))) as posfile:
                pos_data = [l.split() for l in posfile]
                # POSITIONS stored in angstrom
            traj_data = parse_cp_traj_stanzas(num_elements=out_dict['number_of_atoms'],
                                              splitlines=pos_data,
                                              prepend_name='positions_traj',
                                              rescale=bohr_to_ang)

            # here initialize the dictionary. If the parsing of positions fails, though, I don't have anything
            # out of the CP dynamics. Therefore, the calculation status is set to FAILED.
            raw_trajectory['positions_ordered'] = self._get_reordered_array(traj_data['positions_traj_data'],
                                                                            reordering)
            raw_trajectory['times'] = numpy.array(traj_data['positions_traj_times'])
        except IOError:
            out_dict['warnings'].append("Unable to open the POS file... skipping.")
            successful = False
        except Exception as e:
            out_dict['warnings'].append("Error parsing POS file ({}). Skipping file."
                                        .format(e.message))
            successful = False

        # =============== CELL trajectory ============================
        try:
            with open(os.path.join(out_folder.get_abs_path('.'),
                                   '{}.cel'.format(self._calc._PREFIX))) as celfile:
                cel_data = [l.split() for l in celfile]
            traj_data = parse_cp_traj_stanzas(num_elements=3,
                                              splitlines=cel_data,
                                              prepend_name='cell_traj',
                                              rescale=bohr_to_ang)
            raw_trajectory['cells'] = numpy.array(traj_data['cell_traj_data'])
        except IOError:
            out_dict['warnings'].append("Unable to open the CEL file... skipping.")
        except Exception as e:
            out_dict['warnings'].append("Error parsing CEL file ({}). Skipping file."
                                        .format(e.message))

        # =============== VELOCITIES trajectory ============================
        try:
            with open(os.path.join(out_folder.get_abs_path('.'),
                                   '{}.vel'.format(self._calc._PREFIX))) as velfile:
                vel_data = [l.split() for l in velfile]
            traj_data = parse_cp_traj_stanzas(num_elements=out_dict['number_of_atoms'],
                                              splitlines=vel_data,
                                              prepend_name='velocities_traj',
                                              rescale=bohr_to_ang / timeau_to_sec * 10 ** 12)  # velocities in ang/ps,
            raw_trajectory['velocities_ordered'] = self._get_reordered_array(traj_data['velocities_traj_data'],
                                                                             reordering)
        except IOError:
            out_dict['warnings'].append("Unable to open the VEL file... skipping.")
        except Exception as e:
            out_dict['warnings'].append("Error parsing VEL file ({}). Skipping file."
                                        .format(e.message))

        # =============== EVP trajectory ============================
        try:
            matrix = numpy.genfromtxt(os.path.join(out_folder.get_abs_path('.'),
                                                   '{}.evp'.format(self._calc._PREFIX)))
            # there might be a different format if the matrix has one row only
            try:
                matrix.shape[1]
            except IndexError:
                matrix = numpy.array(numpy.matrix(matrix))
            
            if LooseVersion(out_dict['creator_version']) > LooseVersion("5.1"):
                # Between version 5.1 and 5.1.1, someone decided to change
                # the .evp output format, without any way to know that this 
                # happened... SVN commit 11158.
                # I here use the version number to parse, plus some
                # heuristics to check that I'm doing the right thing
                #print "New version"
                raw_trajectory['steps'] = numpy.array(matrix[:,0],dtype=int)
                raw_trajectory['evp_times']                 = matrix[:,1]                    # TPS, ps
                raw_trajectory['electronic_kinetic_energy'] = matrix[:,2] * hartree_to_ev    # EKINC, eV
                raw_trajectory['cell_temperature']          = matrix[:,3]                    # TEMPH, K
                raw_trajectory['ionic_temperature']         = matrix[:,4]                    # TEMPP, K
                raw_trajectory['scf_total_energy']          = matrix[:,5] * hartree_to_ev    # ETOT, eV
                raw_trajectory['enthalpy']                  = matrix[:,6] * hartree_to_ev    # ENTHAL, eV
                raw_trajectory['enthalpy_plus_kinetic']     = matrix[:,7] * hartree_to_ev    # ECONS, eV
                raw_trajectory['energy_constant_motion']    = matrix[:,8] * hartree_to_ev    # ECONT, eV
                raw_trajectory['volume']                    = matrix[:,9] * (bohr_to_ang**3) # volume, angstrom^3
                raw_trajectory['pressure']                  = matrix[:,10]                    # out_press, GPa
            else:
                #print "Old version"    
                raw_trajectory['steps'] = numpy.array(matrix[:,0],dtype=int)
                raw_trajectory['electronic_kinetic_energy'] = matrix[:,1] * hartree_to_ev    # EKINC, eV
                raw_trajectory['cell_temperature']          = matrix[:,2]                    # TEMPH, K
                raw_trajectory['ionic_temperature']         = matrix[:,3]                    # TEMPP, K
                raw_trajectory['scf_total_energy']          = matrix[:,4] * hartree_to_ev    # ETOT, eV
                raw_trajectory['enthalpy']                  = matrix[:,5] * hartree_to_ev    # ENTHAL, eV
                raw_trajectory['enthalpy_plus_kinetic']     = matrix[:,6] * hartree_to_ev    # ECONS, eV
                raw_trajectory['energy_constant_motion']    = matrix[:,7] * hartree_to_ev    # ECONT, eV
                raw_trajectory['volume']                    = matrix[:,8] * (bohr_to_ang**3) # volume, angstrom^3
                raw_trajectory['pressure']                  = matrix[:,9]                    # out_press, GPa
                raw_trajectory['evp_times']                  = matrix[:,10]                    # TPS, ps

            # Huristics to understand if it's correct.
            # A better heuristics could also try to fix possible issues
            # (in new versions of QE, it's possible to recompile it with
            # the __OLD_FORMAT flag to get back the old version format...)
            # but I won't do it, as there may be also other columns swapped.
            # Better to stop and ask the user to check what's going on.
            max_time_difference = abs(
                numpy.array(raw_trajectory['times']) - 
                numpy.array(raw_trajectory['evp_times'])).max()
            if max_time_difference > 1.e-4: # It is typically ~1.e-7 due to roundoff errors
                # If there is a large discrepancy, I set successful = False,
                # it means there is something very weird going on...
                out_dict['warnings'].append("Error parsing EVP file ({}). Skipping file."
                                            .format(e.message))
                successful = False

                # In this case, remove all what has been parsed to avoid users
                # using the wrong data
                for k in evp_keys:
                    try:
                        del raw_trajectory[k]
                    except KeyError:
                        # If for some reason a key is not there, ignore
                        pass
            # Delete evp_times in any case, it's a duplicate of 'times'
            del raw_trajectory['evp_times']
            
        except Exception as e:
            out_dict['warnings'].append("Error parsing EVP file ({}). Skipping file.".format(e.message))
        except IOError:
            out_dict['warnings'].append("Unable to open the EVP file... skipping.")

        # get the symbols from the input        
        # TODO: I should have kinds in TrajectoryData
        raw_trajectory['symbols'] = numpy.array([str(i.kind_name) for i in input_structure.sites])

        traj = TrajectoryData()
        traj.set_trajectory(stepids=raw_trajectory['steps'],
                            cells=raw_trajectory['cells'],
                            symbols=raw_trajectory['symbols'],
                            positions=raw_trajectory['positions_ordered'],
                            times=raw_trajectory['times'],
                            velocities=raw_trajectory['velocities_ordered'],
        )

        for this_name in evp_keys:
            try:
                traj.set_array(this_name,raw_trajectory[this_name])
            except KeyError:
                # Some columns may have not been parsed, skip
                pass
        new_nodes_list = [(self.get_linkname_trajectory(),traj)]
        
        # Remove big dictionaries that would be redundant
        # For atoms and cell, there is a small possibility that nothing is parsed
        # but then probably nothing moved.
        try:
            del out_dict['atoms']
        except KeyError:
            pass
        try:
            del out_dict['cell']
        except KeyError:
            pass
        try:
            del out_dict['ions_positions_stau']
        except KeyError:
            pass
        try:
            del out_dict['ions_positions_svel']
        except KeyError:
            pass
        try:
            del out_dict['ions_positions_taui']
        except KeyError:
            pass
        # This should not be needed
        try:
            del out_dict['atoms_index_list']
        except KeyError:
            pass
        # This should be already in the input
        try:
            del out_dict['atoms_if_pos_list']
        except KeyError:
            pass
        # 
        try:
            del out_dict['ions_positions_force']
        except KeyError:
            pass

        # convert the dictionary into an AiiDA object
        output_params = ParameterData(dict=out_dict)
        # save it into db
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        return successful, new_nodes_list
Exemple #13
0
    def create_trajectory_data():
        from aiida.orm.data.array.trajectory import TrajectoryData
        from aiida.orm.group import Group
        import numpy

        # Create a node with two arrays
        n = TrajectoryData()

        # I create sample data
        stepids = numpy.array([60, 70])
        times = stepids * 0.01
        cells = numpy.array([[[
            2.,
            0.,
            0.,
        ], [
            0.,
            2.,
            0.,
        ], [
            0.,
            0.,
            2.,
        ]], [[
            3.,
            0.,
            0.,
        ], [
            0.,
            3.,
            0.,
        ], [
            0.,
            0.,
            3.,
        ]]])
        symbols = numpy.array(['H', 'O', 'C'])
        positions = numpy.array([[[0., 0., 0.], [0.5, 0.5, 0.5],
                                  [1.5, 1.5, 1.5]],
                                 [[0., 0., 0.], [0.5, 0.5, 0.5],
                                  [1.5, 1.5, 1.5]]])
        velocities = numpy.array([[[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]],
                                  [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5],
                                   [-0.5, -0.5, -0.5]]])

        # I set the node
        n.set_trajectory(stepids=stepids,
                         cells=cells,
                         symbols=symbols,
                         positions=positions,
                         times=times,
                         velocities=velocities)

        n.store()

        # Create 2 groups and add the data to one of them
        g_ne = Group(name='non_empty_group')
        g_ne.store()
        g_ne.add_nodes(n)

        g_e = Group(name='empty_group')
        g_e.store()

        return {
            TestVerdiDataListable.NODE_ID_STR: n.id,
            TestVerdiDataListable.NON_EMPTY_GROUP_ID_STR: g_ne.id,
            TestVerdiDataListable.EMPTY_GROUP_ID_STR: g_e.id
        }
Exemple #14
0
    def parse_with_retrieved(self, retrieved):
        """
        Parse the output nodes for a PwCalculations from a dictionary of retrieved nodes.
        Two nodes that are expected are the default 'retrieved' FolderData node which will
        store the retrieved files permanently in the repository. The second required node
        is the unstored FolderData node with the temporary retrieved files, which should
        be passed under the key 'retrieved_temporary_folder_key' of the Parser class.

        :param retrieved: a dictionary of retrieved nodes
        """
        import os

        successful = True

        # Load the input dictionary
        parameters = self._calc.inp.parameters.get_dict()

        # Look for optional settings input node and potential 'parser_options' dictionary within it
        try:
            settings = self._calc.inp.settings.get_dict()
            parser_opts = settings[self.get_parser_settings_key()]
        except (AttributeError, KeyError):
            settings = {}
            parser_opts = {}

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # Verify that the retrieved_temporary_folder is within the arguments if temporary files were specified
        if self._calc._get_retrieve_temporary_list():
            try:
                temporary_folder = retrieved[
                    self.retrieved_temporary_folder_key]
                dir_with_bands = temporary_folder.get_abs_path('.')
            except KeyError:
                self.logger.error(
                    'the {} was not passed as an argument'.format(
                        self.retrieved_temporary_folder_key))
                return False, ()
        else:
            dir_with_bands = None

        list_of_files = out_folder.get_folder_list()

        # The stdout is required for parsing
        if self._calc._OUTPUT_FILE_NAME not in list_of_files:
            self.logger.error(
                "The standard output file '{}' was not found but is required".
                format(self._calc._OUTPUT_FILE_NAME))
            return False, ()

        # The xml file is required for parsing
        if self._calc._DATAFILE_XML_BASENAME not in list_of_files:
            self.logger.error(
                "The xml output file '{}' was not found but is required".
                format(self._calc._DATAFILE_XML_BASENAME))
            successful = False
            xml_file = None
        else:
            xml_file = os.path.join(out_folder.get_abs_path('.'),
                                    self._calc._DATAFILE_XML_BASENAME)

        out_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._OUTPUT_FILE_NAME)

        # Call the raw parsing function
        parsing_args = [
            out_file, parameters, parser_opts, xml_file, dir_with_bands
        ]
        out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output(
            *parsing_args)

        # If calculation was not considered failed already, use the new value
        successful = raw_successful if successful else successful

        # If the parser option 'all_symmetries' is not set to True, we reduce the raw parsed symmetries to safe space
        all_symmetries = parser_opts.get('all_symmetries', False)

        if not all_symmetries:

            # In the standard output, each symmetry operation print two rotation matrices:
            #
            # * S_cryst^T: matrix in crystal coordinates, transposed
            # * S_cart: matrix in cartesian coordinates,
            #
            # The XML files only print one matrix:
            #
            # * S_cryst: matrix in crystal coordinates
            #
            # The raw parsed symmetry information from the XML is large and will load the database heavily if stored as
            # is for each calculation. Instead, we will map these dictionaries onto a static dictionary of rotation
            # matrices generated by the _get_qe_symmetry_list static method. This dictionary will return the rotation
            # matrices in cartesian coordinates, i.e. S_cart. In order to compare the raw matrices from the XML to these
            # static matrices we have to convert S_cryst into S_cart. We derive here how that is done:
            #
            #   S_cryst * v_cryst = v_cryst'
            #
            # where v_cryst' is the rotated vector v_cryst under S_cryst
            # We define `cell` where cell vectors are rows. Converting a vector from crystal to cartesian
            # coordinates is defined as:
            #
            #   cell^T * v_cryst = v_cart
            #
            # The inverse of this operation is defined as
            #
            #   v_cryst = cell^Tinv * v_cart
            #
            # Replacing the last equation into the first we find:
            #
            #   S_cryst * cell^Tinv * v_cart = cell^Tinv * v_cart'
            #
            # Multiply on the left with cell^T gives:
            #
            #   cell^T * S_cryst * cell^Tinv * v_cart = v_cart'
            #
            # which can be rewritten as:
            #
            #   S_cart * v_cart = v_cart'
            #
            # where:
            #
            #   S_cart = cell^T * S_cryst * cell^Tinv
            #
            # We compute here the transpose and its inverse of the structure cell basis, which is needed to transform
            # the parsed rotation matrices, which are in crystal coordinates, to cartesian coordinates, which are the
            # matrices that are returned by the _get_qe_symmetry_list staticmethod
            cell = structure_data['cell']['lattice_vectors']
            cell_T = numpy.transpose(cell)
            cell_Tinv = numpy.linalg.inv(cell_T)

            try:
                if 'symmetries' in out_dict.keys():
                    old_symmetries = out_dict['symmetries']
                    new_symmetries = []
                    for this_sym in old_symmetries:
                        name = this_sym['name'].strip()
                        for i, this in enumerate(self._possible_symmetries):
                            # Since we do an exact comparison we strip the string name from whitespace
                            # and as soon as it is matched, we break to prevent it from matching another
                            if name == this['name'].strip():
                                index = i
                                break
                        else:
                            index = None
                            self.logger.error(
                                'Symmetry {} not found'.format(name))

                        new_dict = {}
                        if index is not None:
                            # The raw parsed rotation matrix is in crystal coordinates, whereas the mapped rotation
                            # in self._possible_symmetries is in cartesian coordinates. To allow them to be compared
                            # to make sure we matched the correct rotation symmetry, we first convert the parsed matrix
                            # to cartesian coordinates. For explanation of the method, see comment above.
                            rotation_cryst = this_sym['rotation']
                            rotation_cart_new = self._possible_symmetries[
                                index]['matrix']
                            rotation_cart_old = numpy.dot(
                                cell_T, numpy.dot(rotation_cryst, cell_Tinv))

                            inversion = self._possible_symmetries[index][
                                'inversion']
                            if not are_matrices_equal(
                                    rotation_cart_old,
                                    rotation_cart_new,
                                    swap_sign_matrix_b=inversion):
                                self.logger.error(
                                    'Mapped rotation matrix {} does not match the original rotation {}'
                                    .format(rotation_cart_new,
                                            rotation_cart_old))
                                new_dict['all_symmetries'] = this_sym
                            else:
                                # Note: here I lose the information about equivalent ions and fractional_translation.
                                new_dict['t_rev'] = this_sym['t_rev']
                                new_dict['symmetry_number'] = index
                        else:
                            new_dict['all_symmetries'] = this_sym

                        new_symmetries.append(new_dict)

                    out_dict[
                        'symmetries'] = new_symmetries  # and overwrite the old one
            except KeyError:  # no symmetries were parsed (failed case, likely)
                self.logger.error("No symmetries were found in output")

        new_nodes_list = []

        # I eventually save the new structure. structure_data is unnecessary after this
        in_struc = self._calc.get_inputs_dict()['structure']
        type_calc = parameters['CONTROL']['calculation']
        struc = in_struc
        if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']:
            if 'cell' in structure_data.keys():
                struc = convert_qe2aiida_structure(structure_data,
                                                   input_structure=in_struc)
                new_nodes_list.append(
                    (self.get_linkname_outstructure(), struc))

        k_points_list = trajectory_data.pop('k_points', None)
        k_points_weights_list = trajectory_data.pop('k_points_weights', None)

        if k_points_list is not None:

            # Build the kpoints object
            if out_dict['k_points_units'] not in ['2 pi / Angstrom']:
                raise QEOutputParsingError(
                    'Error in kpoints units (should be cartesian)')

            kpoints_from_output = KpointsData()
            kpoints_from_output.set_cell_from_structure(struc)
            kpoints_from_output.set_kpoints(k_points_list,
                                            cartesian=True,
                                            weights=k_points_weights_list)
            kpoints_from_input = self._calc.inp.kpoints

            if not bands_data:
                try:
                    kpoints_from_input.get_kpoints()
                except AttributeError:
                    new_nodes_list += [(self.get_linkname_out_kpoints(),
                                        kpoints_from_output)]

            # Converting bands into a BandsData object (including the kpoints)
            if bands_data:
                kpoints_for_bands = kpoints_from_output

                try:
                    kpoints_from_input.get_kpoints()
                    kpoints_for_bands.labels = kpoints_from_input.labels
                except (AttributeError, ValueError, TypeError):
                    # AttributeError: no list of kpoints in input
                    # ValueError: labels from input do not match the output
                    #      list of kpoints (some kpoints are missing)
                    # TypeError: labels are not set, so kpoints_from_input.labels=None
                    pass

                # Get the bands occupations and correct the occupations of QE:
                # If it computes only one component, it occupies it with half number of electrons
                try:
                    bands_data['occupations'][1]
                    the_occupations = bands_data['occupations']
                except IndexError:
                    the_occupations = 2. * numpy.array(
                        bands_data['occupations'][0])

                try:
                    bands_data['bands'][1]
                    bands_energies = bands_data['bands']
                except IndexError:
                    bands_energies = bands_data['bands'][0]

                the_bands_data = BandsData()
                the_bands_data.set_kpointsdata(kpoints_for_bands)
                the_bands_data.set_bands(bands_energies,
                                         units=bands_data['bands_units'],
                                         occupations=the_occupations)

                new_nodes_list += [('output_band', the_bands_data)]
                out_dict['linknames_band'] = ['output_band']

        # Separate the atomic_occupations dictionary in its own node if it is present
        atomic_occupations = out_dict.get('atomic_occupations', {})
        if atomic_occupations:
            out_dict.pop('atomic_occupations')
            atomic_occupations_node = ParameterData(dict=atomic_occupations)
            new_nodes_list.append(
                ('output_atomic_occupations', atomic_occupations_node))

        output_params = ParameterData(dict=out_dict)
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        if trajectory_data:
            from aiida.orm.data.array.trajectory import TrajectoryData
            from aiida.orm.data.array import ArrayData
            try:
                positions = numpy.array(
                    trajectory_data.pop('atomic_positions_relax'))
                try:
                    cells = numpy.array(
                        trajectory_data.pop('lattice_vectors_relax'))
                    # if KeyError, the MD was at fixed cell
                except KeyError:
                    cells = numpy.array([in_struc.cell] * len(positions))

                symbols = numpy.array(
                    [str(i.kind_name) for i in in_struc.sites])
                stepids = numpy.arange(
                    len(positions))  # a growing integer per step
                # I will insert time parsing when they fix their issues about time
                # printing (logic is broken if restart is on)

                traj = TrajectoryData()
                traj.set_trajectory(
                    stepids=stepids,
                    cells=cells,
                    symbols=symbols,
                    positions=positions,
                )
                for x in trajectory_data.iteritems():
                    traj.set_array(x[0], numpy.array(x[1]))
                new_nodes_list.append(
                    (self.get_linkname_outtrajectory(), traj))

            except KeyError:
                # forces, atomic charges and atomic mag. moments, in scf calculation (when outputed)
                arraydata = ArrayData()
                for x in trajectory_data.iteritems():
                    arraydata.set_array(x[0], numpy.array(x[1]))
                new_nodes_list.append(
                    (self.get_linkname_outarray(), arraydata))

        return successful, new_nodes_list