Esempio n. 1
0
    def _get_output_nodes(self, output_path, messages_path, xml_path,
                          json_path, bands_path):
        """
        Extracts output nodes from the standard output and standard error
        files. (And XML and JSON files)
        """
        from aiida.orm.data.array.trajectory import TrajectoryData
        import re

        parser_version = 'aiida-0.11.0--plugin-0.11.5'
        parser_info = {}
        parser_info['parser_info'] = 'AiiDA Siesta Parser V. {}'.format(
            parser_version)
        parser_info['parser_warnings'] = []

        result_list = []

        if xml_path is None:
            self.logger.error("Could not find a CML file to parse")
            # NOTE aiida.xml is not there?
            raise SiestaOutputParsingError(
                "Could not find a CML file to parse")

        # We get everything from the CML file

        xmldoc = get_parsed_xml_doc(xml_path)
        if xmldoc is None:
            self.logger.error("Malformed CML file: cannot parse")
            raise SiestaCMLParsingError("Malformed CML file: cannot parse")

        # These are examples of how we can access input items
        #
        # Structure (mandatory)
        #
        in_struc = self._calc.get_inputs_dict()['structure']
        #
        # Settings (optional)
        #
        try:
            in_settings = self._calc.get_inputs_dict()['settings']
        except KeyError:
            in_settings = None

        result_dict = get_dict_from_xml_doc(xmldoc)

        # Add timing information

        if json_path is None:
            self.logger.info("Could not find a time.json file to parse")
        else:
            from json_time import get_timing_info
            global_time, timing_decomp = get_timing_info(json_path)
            if global_time is None:
                self.logger.info("Cannot fully parse the time.json file")
            else:
                result_dict["global_time"] = global_time
                result_dict["timing_decomposition"] = timing_decomp

        # Add warnings
        successful = True
        if messages_path is None:
            # Perhaps using an old version of Siesta
            warnings_list = ['WARNING: No MESSAGES file...']
        else:
            successful, warnings_list = self.get_warnings_from_file(
                messages_path)

        result_dict["warnings"] = warnings_list

        # Add parser info dictionary
        parsed_dict = dict(result_dict.items() + parser_info.items())

        output_data = ParameterData(dict=parsed_dict)

        link_name = self.get_linkname_outparams()
        result_list.append((link_name, output_data))

        # If the structure has changed, save it
        if is_variable_geometry(xmldoc):
            # Get the input structure to copy its site names,
            # as the CML file traditionally contained only the
            # atomic symbols.
            #
            struc = get_last_structure(xmldoc, in_struc)
            result_list.append((self.get_linkname_outstructure(), struc))

        # Save forces and stress in an ArrayData object
        forces, stress = get_final_forces_and_stress(xmldoc)

        if forces is not None and stress is not None:
            from aiida.orm.data.array import ArrayData
            arraydata = ArrayData()
            arraydata.set_array('forces', np.array(forces))
            arraydata.set_array('stress', np.array(stress))
            result_list.append((self.get_linkname_outarray(), arraydata))

        # Parse band-structure information if available
        if bands_path is not None:
            bands, coords = self.get_bands(bands_path)
            from aiida.orm.data.array.bands import BandsData
            arraybands = BandsData()
            arraybands.set_kpoints(
                self._calc.inp.bandskpoints.get_kpoints(cartesian=True))
            arraybands.set_bands(bands, units="eV")
            result_list.append((self.get_linkname_bandsarray(), arraybands))
            bandsparameters = ParameterData(dict={"kp_coordinates": coords})
            result_list.append(
                (self.get_linkname_bandsparameters(), bandsparameters))

        return successful, result_list
Esempio n. 2
0
    def parse_with_retrieved(self, retrieved):
        """
        Parse the output nodes for a PwCalculations from a dictionary of retrieved nodes.
        Two nodes that are expected are the default 'retrieved' FolderData node which will
        store the retrieved files permanently in the repository. The second required node
        is the unstored FolderData node with the temporary retrieved files, which should
        be passed under the key 'retrieved_temporary_folder_key' of the Parser class.

        :param retrieved: a dictionary of retrieved nodes
        """
        import os
        import numpy

        successful = True

        # Load the input dictionary
        parameters = self._calc.inp.parameters.get_dict()

        # Look for optional settings input node and potential 'parser_options' dictionary within it
        try:
            settings = self._calc.inp.settings.get_dict()
            parser_opts = settings[self.get_parser_settings_key()]
        except (AttributeError, KeyError):
            settings = {}
            parser_opts = {}

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # Verify that the retrieved_temporary_folder is within the arguments if temporary files were specified
        if self._calc._get_retrieve_temporary_list():
            try:
                temporary_folder = retrieved[self.retrieved_temporary_folder_key]
                dir_with_bands = temporary_folder.get_abs_path('.')
            except KeyError:
                self.logger.error('the {} was not passed as an argument'.format(self.retrieved_temporary_folder_key))
                return False, ()
        else:
            dir_with_bands = None

        list_of_files = out_folder.get_folder_list()

        # The stdout is required for parsing
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            self.logger.error("The standard output file '{}' was not found but is required".format(self._calc._OUTPUT_FILE_NAME))
            return False, ()

        # The xml file is required for parsing
        if not self._calc._DATAFILE_XML_BASENAME in list_of_files:
            self.logger.error("The xml output file '{}' was not found but is required".format(self._calc._DATAFILE_XML_BASENAME))
            successful = False
            xml_file = None
        else:
            xml_file = os.path.join(out_folder.get_abs_path('.'), self._calc._DATAFILE_XML_BASENAME)

        out_file = os.path.join(out_folder.get_abs_path('.'), self._calc._OUTPUT_FILE_NAME)

        # Call the raw parsing function
        parsing_args = [out_file, parameters, parser_opts, xml_file, dir_with_bands]
        out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output(*parsing_args)

        # If calculation was not considered failed already, use the new value
        successful = raw_successful if successful else successful

        # The symmetry info has large arrays, that occupy most of the database.
        # turns out most of this is due to 64 matrices that are repeated over and over again.
        # therefore I map part of the results in a list of dictionaries wrote here once and for all
        # if the parser_opts has a key all_symmetries set to True, I don't reduce it
        all_symmetries = parser_opts.get('all_symmetries', False)
        if not all_symmetries:
            try:
                if 'symmetries' in out_dict.keys():
                    old_symmetries = out_dict['symmetries']
                    new_symmetries = []
                    for this_sym in old_symmetries:
                        name = this_sym['name']
                        index = None
                        for i,this in enumerate(self._possible_symmetries):
                            if name in this['name']:
                                index = i
                        if index is None:
                            self.logger.error("Symmetry {} not found".format(name))
                        new_dict = {}
                        # note: here I lose the information about equivalent
                        # ions and fractional_translation.
                        # They will be present with all_symmetries=True
                        new_dict['t_rev'] = this_sym['t_rev']
                        new_dict['symmetry_number'] = index
                        new_symmetries.append(new_dict)
                    out_dict['symmetries'] = new_symmetries # and overwrite the old one
            except KeyError: # no symmetries were parsed (failed case, likely)
                self.logger.error("No symmetries were found in output")

        new_nodes_list = []

        # I eventually save the new structure. structure_data is unnecessary after this
        in_struc = self._calc.get_inputs_dict()['structure']
        type_calc = parameters['CONTROL']['calculation']
        struc = in_struc
        if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']:
            if 'cell' in structure_data.keys():
                struc = convert_qe2aiida_structure(structure_data, input_structure=in_struc)
                new_nodes_list.append((self.get_linkname_outstructure(), struc))

        k_points_list = trajectory_data.pop('k_points', None)
        k_points_weights_list = trajectory_data.pop('k_points_weights', None)

        if k_points_list is not None:

            # Build the kpoints object
            if out_dict['k_points_units'] not in ['2 pi / Angstrom']:
                raise QEOutputParsingError('Error in kpoints units (should be cartesian)')

            kpoints_from_output = KpointsData()
            kpoints_from_output.set_cell_from_structure(struc)
            kpoints_from_output.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list)
            kpoints_from_input = self._calc.inp.kpoints

            if not bands_data:
                try:
                    kpoints_from_input.get_kpoints()
                except AttributeError:
                    new_nodes_list += [(self.get_linkname_out_kpoints(), kpoints_from_output)]

            # Converting bands into a BandsData object (including the kpoints)
            if bands_data:
                kpoints_for_bands = kpoints_from_output

                try:
                    kpoints_from_input.get_kpoints()
                    kpoints_for_bands.labels = kpoints_from_input.labels
                except (AttributeError, ValueError, TypeError):
                    # AttributeError: no list of kpoints in input
                    # ValueError: labels from input do not match the output
                    #      list of kpoints (some kpoints are missing)
                    # TypeError: labels are not set, so kpoints_from_input.labels=None
                    pass

                # Get the bands occupations and correct the occupations of QE:
                # If it computes only one component, it occupies it with half number of electrons
                try:
                    bands_data['occupations'][1]
                    the_occupations = bands_data['occupations']
                except IndexError:
                    the_occupations = 2.*numpy.array(bands_data['occupations'][0])

                try:
                    bands_data['bands'][1]
                    bands_energies = bands_data['bands']
                except IndexError:
                    bands_energies = bands_data['bands'][0]

                the_bands_data = BandsData()
                the_bands_data.set_kpointsdata(kpoints_for_bands)
                the_bands_data.set_bands(bands_energies,
                                         units = bands_data['bands_units'],
                                         occupations = the_occupations)

                new_nodes_list += [('output_band', the_bands_data)]
                out_dict['linknames_band'] = ['output_band']

        # Separate the atomic_occupations dictionary in its own node if it is present
        atomic_occupations = out_dict.get('atomic_occupations', {})
        if atomic_occupations:
            out_dict.pop('atomic_occupations')
            atomic_occupations_node = ParameterData(dict=atomic_occupations)
            new_nodes_list.append(('output_atomic_occupations', atomic_occupations_node))

        output_params = ParameterData(dict=out_dict)
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        if trajectory_data:
            from aiida.orm.data.array.trajectory import TrajectoryData
            from aiida.orm.data.array import ArrayData
            try:
                positions = numpy.array( trajectory_data.pop('atomic_positions_relax'))
                try:
                    cells = numpy.array( trajectory_data.pop('lattice_vectors_relax'))
                    # if KeyError, the MD was at fixed cell
                except KeyError:
                    cells = numpy.array([in_struc.cell] * len(positions))

                symbols = numpy.array([str(i.kind_name) for i in in_struc.sites])
                stepids = numpy.arange(len(positions)) # a growing integer per step
                # I will insert time parsing when they fix their issues about time
                # printing (logic is broken if restart is on)

                traj = TrajectoryData()
                traj.set_trajectory(
                    stepids = stepids,
                    cells = cells,
                    symbols = symbols,
                    positions = positions,
                )
                for x in trajectory_data.iteritems():
                    traj.set_array(x[0],numpy.array(x[1]))
                new_nodes_list.append((self.get_linkname_outtrajectory(),traj))

            except KeyError:
                # forces, atomic charges and atomic mag. moments, in scf calculation (when outputed)
                arraydata = ArrayData()
                for x in trajectory_data.iteritems():
                    arraydata.set_array(x[0],numpy.array(x[1]))
                new_nodes_list.append((self.get_linkname_outarray(),arraydata))

        return successful, new_nodes_list
Esempio n. 3
0
    def parse_with_retrieved(self, retrieved):
        """
        Parses the datafolder, stores results.
        This parser for this simple code does simply store in the DB a node
        representing the file of phonon frequencies
        """
        from aiida.common.exceptions import InvalidOperation

        # suppose at the start that the job is successful
        successful = True
        new_nodes_list = []

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()
        # at least the stdout should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            successful = False
            self.logger.error("Standard output not found")
            return successful, ()

        # check that the file has finished (i.e. JOB DONE is inside the file)
        filpath = out_folder.get_abs_path(self._calc._OUTPUT_FILE_NAME)
        with open(filpath, 'r') as fil:
            lines = fil.read()
        if "JOB DONE" not in lines:
            successful = False
            self.logger.error("Computation did not finish properly")

        # check that the phonon frequencies file is present
        try:
            # define phonon frequencies file name
            phonon_file = out_folder.get_abs_path(
                self._calc._PHONON_FREQUENCIES_NAME)
        except OSError:
            successful = False
            self.logger.error("File with phonon frequencies not found")
            return successful, new_nodes_list

        # extract the kpoints from the input data and create the kpointsdata for bands
        kpointsdata = self._calc.inp.kpoints
        try:
            kpoints = kpointsdata.get_kpoints()
            kpointsdata_for_bands = kpointsdata.copy()
        except AttributeError:
            kpoints = kpointsdata.get_kpoints_mesh(print_list=True)
            kpointsdata_for_bands = KpointsData()
            kpointsdata_for_bands.set_kpoints(kpoints)
        # find the number of kpoints
        num_kpoints = kpoints.shape[0]

        # call the raw parsing function
        parsed_data = parse_raw_matdyn_phonon_file(phonon_file)

        # extract number of kpoints read from the file (and take out from output
        # dictionary)
        try:
            this_num_kpoints = parsed_data.pop('num_kpoints')
        except KeyError:
            successful = False
            self.logger.error("Wrong number of kpoints")
            # warning message already in parsed_data
            return successful, new_nodes_list

        # check that the number of kpoints from the file is the same as the one
        # in the input kpoints
        if num_kpoints != this_num_kpoints:
            successful = False
            self.logger.error("Number of kpoints different in input and in "
                              "phonon frequencies file")

        # extract phonon bands (and take out from output dictionary)
        phonon_bands = parsed_data.pop('phonon_bands')

        # save phonon branches into BandsData
        output_bands = BandsData()
        output_bands.set_kpointsdata(kpointsdata_for_bands)
        output_bands.set_bands(phonon_bands, units='THz')

        # convert the dictionary into an AiiDA object (here only warnings remain)
        output_params = ParameterData(dict=parsed_data)

        for message in parsed_data['warnings']:
            self.logger.error(message)

        # prepare the list of output nodes to be returned
        new_nodes_list = [(self.get_linkname_outparams(), output_params),
                          (self.get_linkname_outbands(), output_bands)]

        return successful, new_nodes_list
Esempio n. 4
0
    def parse_with_retrieved(self, retrieved):
        """
        Receives in input a dictionary of retrieved nodes.
        Does all the logic here.
        """
        from aiida.common.exceptions import InvalidOperation
        import os
        import glob

        successful = True

        # check if I'm not to overwrite anything
        #state = self._calc.get_state()
        #if state != calc_states.PARSING:
        #    raise InvalidOperation("Calculation not in {} state"
        #                           .format(calc_states.PARSING) )

        # look for eventual flags of the parser
        try:
            parser_opts = self._calc.inp.settings.get_dict()[
                self.get_parser_settings_key()]
        except (AttributeError, KeyError):
            parser_opts = {}

        # load the input dictionary
        # TODO: pass this input_dict to the parser. It might need it.
        input_dict = self._calc.inp.parameters.get_dict()

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()
        # at least the stdout should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            self.logger.error("Standard output not found")
            successful = False
            return successful, ()
        # if there is something more, I note it down, so to call the raw parser
        # with the right options
        # look for xml
        has_xml = False
        if self._calc._DATAFILE_XML_BASENAME in list_of_files:
            has_xml = True
        # look for bands
        has_bands = False
        if glob.glob(os.path.join(out_folder.get_abs_path('.'), 'K*[0-9]')):
            # Note: assuming format of kpoints subfolder is K*[0-9]
            has_bands = True
            # TODO: maybe it can be more general than bands only?
        out_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._OUTPUT_FILE_NAME)
        xml_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._DATAFILE_XML_BASENAME)
        dir_with_bands = out_folder.get_abs_path('.')

        # call the raw parsing function
        parsing_args = [out_file, input_dict, parser_opts]
        if has_xml:
            parsing_args.append(xml_file)
        if has_bands:
            if not has_xml:
                self.logger.warning("Cannot parse bands if xml file not "
                                    "found")
            else:
                parsing_args.append(dir_with_bands)

        out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output(
            *parsing_args)

        # if calculation was not considered failed already, use the new value
        successful = raw_successful if successful else successful

        # The symmetry info has large arrays, that occupy most of the database.
        # turns out most of this is due to 64 matrices that are repeated over and over again.
        # therefore I map part of the results in a list of dictionaries wrote here once and for all
        # if the parser_opts has a key all_symmetries set to True, I don't reduce it
        all_symmetries = parser_opts.get('all_symmetries', False)
        if not all_symmetries:
            try:
                if 'symmetries' in out_dict.keys():
                    old_symmetries = out_dict['symmetries']
                    new_symmetries = []
                    for this_sym in old_symmetries:
                        name = this_sym['name']
                        index = None
                        for i, this in enumerate(self._possible_symmetries):
                            if name in this['name']:
                                index = i
                        if index is None:
                            self.logger.error(
                                "Symmetry {} not found".format(name))
                        new_dict = {}
                        # note: here I lose the information about equivalent
                        # ions and fractional_translation.
                        # They will be present with all_symmetries=True
                        new_dict['t_rev'] = this_sym['t_rev']
                        new_dict['symmetry_number'] = index
                        new_symmetries.append(new_dict)
                    out_dict[
                        'symmetries'] = new_symmetries  # and overwrite the old one
            except KeyError:  # no symmetries were parsed (failed case, likely)
                self.logger.error("No symmetries were found in output")

        new_nodes_list = []

        # I eventually save the new structure. structure_data is unnecessary after this
        in_struc = self._calc.get_inputs_dict()['structure']
        type_calc = input_dict['CONTROL']['calculation']
        struc = in_struc
        if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']:
            if 'cell' in structure_data.keys():
                struc = convert_qe2aiida_structure(structure_data,
                                                   input_structure=in_struc)
                new_nodes_list.append(
                    (self.get_linkname_outstructure(), struc))

        k_points_list = trajectory_data.pop('k_points', None)
        k_points_weights_list = trajectory_data.pop('k_points_weights', None)

        if k_points_list is not None:

            # build the kpoints object
            if out_dict['k_points_units'] not in ['2 pi / Angstrom']:
                raise QEOutputParsingError(
                    'Error in kpoints units (should be cartesian)')
            # converting bands into a BandsData object (including the kpoints)

            kpoints_from_output = KpointsData()
            kpoints_from_output.set_cell_from_structure(struc)
            kpoints_from_output.set_kpoints(k_points_list,
                                            cartesian=True,
                                            weights=k_points_weights_list)
            kpoints_from_input = self._calc.inp.kpoints

            if not bands_data:
                try:
                    kpoints_from_input.get_kpoints()
                except AttributeError:
                    new_nodes_list += [(self.get_linkname_out_kpoints(),
                                        kpoints_from_output)]

            if bands_data:
                import numpy
                # converting bands into a BandsData object (including the kpoints)

                kpoints_for_bands = kpoints_from_output

                try:
                    kpoints_from_input.get_kpoints()
                    kpoints_for_bands.labels = kpoints_from_input.labels
                except (AttributeError, ValueError, TypeError):
                    # AttributeError: no list of kpoints in input
                    # ValueError: labels from input do not match the output
                    #      list of kpoints (some kpoints are missing)
                    # TypeError: labels are not set, so kpoints_from_input.labels=None
                    pass

                # get the bands occupations.
                # correct the occupations of QE: if it computes only one component,
                # it occupies it with half number of electrons
                try:
                    bands_data['occupations'][1]
                    the_occupations = bands_data['occupations']
                except IndexError:
                    the_occupations = 2. * numpy.array(
                        bands_data['occupations'][0])

                try:
                    bands_data['bands'][1]
                    bands_energies = bands_data['bands']
                except IndexError:
                    bands_energies = bands_data['bands'][0]

                the_bands_data = BandsData()
                the_bands_data.set_kpointsdata(kpoints_for_bands)
                the_bands_data.set_bands(bands_energies,
                                         units=bands_data['bands_units'],
                                         occupations=the_occupations)

                new_nodes_list += [('output_band', the_bands_data)]
                out_dict['linknames_band'] = ['output_band']

        # convert the dictionary into an AiiDA object
        output_params = ParameterData(dict=out_dict)
        # return it to the execmanager
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        if trajectory_data:
            import numpy
            from aiida.orm.data.array.trajectory import TrajectoryData
            from aiida.orm.data.array import ArrayData
            try:
                positions = numpy.array(
                    trajectory_data.pop('atomic_positions_relax'))
                try:
                    cells = numpy.array(
                        trajectory_data.pop('lattice_vectors_relax'))
                    # if KeyError, the MD was at fixed cell
                except KeyError:
                    cells = numpy.array([in_struc.cell] * len(positions))

                symbols = numpy.array(
                    [str(i.kind_name) for i in in_struc.sites])
                stepids = numpy.arange(
                    len(positions))  # a growing integer per step
                # I will insert time parsing when they fix their issues about time
                # printing (logic is broken if restart is on)

                traj = TrajectoryData()
                traj.set_trajectory(
                    stepids=stepids,
                    cells=cells,
                    symbols=symbols,
                    positions=positions,
                )
                for x in trajectory_data.iteritems():
                    traj.set_array(x[0], numpy.array(x[1]))
                # return it to the execmanager
                new_nodes_list.append(
                    (self.get_linkname_outtrajectory(), traj))

            except KeyError:
                # forces, atomic charges and atomic mag. moments, in scf
                # calculation (when outputed)
                arraydata = ArrayData()
                for x in trajectory_data.iteritems():
                    arraydata.set_array(x[0], numpy.array(x[1]))
                # return it to the execmanager
                new_nodes_list.append(
                    (self.get_linkname_outarray(), arraydata))

        return successful, new_nodes_list
Esempio n. 5
0
 def _aiida_bands_data(self, data, cell, kpoints_dict):
     if not data:
         return False
     kpt_idx = sorted(data.keys())  #  list of kpoint indices
     try:
         k_list = [kpoints_dict[i]
                   for i in kpt_idx]  # list of k-point triplet
     except KeyError:
         # kpoint triplets are not present (true  for .qp and so on, can not use BandsData)
         # We use the internal Yambo Format  [ [Eo_1, Eo_2,... ], ...[So_1,So_2,] ]
         #                                  QP_TABLE  [[ib_1,ik_1,isp_1]      ,[ib_n,ik_n,isp_n]]
         # Each entry in DATA has corresponding legend in QP_TABLE that defines its details
         # like   ib= Band index,  ik= kpoint index,  isp= spin polarization index.
         #  Eo_1 =>  at ib_1, ik_1 isp_1.
         pdata = ArrayData()
         QP_TABLE = []
         ORD = []
         Eo = []
         E_minus_Eo = []
         So = []
         Z = []
         for ky in data.keys():  # kp == kpoint index as a string  1,2,..
             for ind in range(len(data[ky]['Band'])):
                 try:
                     Eo.append(data[ky]['Eo'][ind])
                 except KeyError:
                     pass
                 try:
                     E_minus_Eo.append(data[ky]['E-Eo'][ind])
                 except KeyError:
                     pass
                 try:
                     So.append(data[ky]['Sc|Eo'][ind])
                 except KeyError:
                     pass
                 try:
                     Z.append(data[ky]['Z'][ind])
                 except KeyError:
                     pass
                 ik = int(ky)
                 ib = data[ky]['Band'][ind]
                 isp = 0
                 if 'Spin_Pol' in data[ky].keys():
                     isp = data[ky]['Spin_Pol'][ind]
                 QP_TABLE.append([ik, ib, isp])
         pdata.set_array('Eo', numpy.array(Eo))
         pdata.set_array('E_minus_Eo', numpy.array(E_minus_Eo))
         pdata.set_array('So', numpy.array(So))
         pdata.set_array('Z', numpy.array(Z))
         pdata.set_array('qp_table', numpy.array(QP_TABLE))
         return pdata
     quasiparticle_bands = BandsData()
     quasiparticle_bands.set_cell(cell)
     quasiparticle_bands.set_kpoints(k_list, cartesian=True)
     # labels will come from any of the keys in the nested  kp_point data,
     # there is a uniform set of observables for each k-point, ie Band, Eo, ...
     # ***FIXME BUG does not seem to handle spin polarizes at all when constructing bandsdata***
     bands_labels = [
         legend for legend in sorted(data[data.keys()[0]].keys())
     ]
     append_list = [[] for i in bands_labels]
     for kp in kpt_idx:
         for i in range(len(bands_labels)):
             append_list[i].append(data[kp][bands_labels[i]])
     generalised_bands = [numpy.array(it) for it in append_list]
     quasiparticle_bands.set_bands(bands=generalised_bands,
                                   units='eV',
                                   labels=bands_labels)
     return quasiparticle_bands
Esempio n. 6
0
    def get_bands_and_parents_structure(self, args):
        """
        Returns bands and closest parent structure     
        """
        from collections import defaultdict
        from django.db.models import Q
        from aiida.common.utils import grouper
        from aiida.backends.djsite.db import models
        from aiida.backends.utils import get_automatic_user

        from aiida.orm.data.structure import (get_formula, get_symbols_string)
        from aiida.orm.data.array.bands import BandsData

        query_group_size = 100
        q_object = None
        if args.all_users is False:
            q_object = Q(user=get_automatic_user())
        else:
            q_object = Q()

        self.query_past_days(q_object, args)
        self.query_group(q_object, args)

        bands_list = BandsData.query(q_object).distinct().order_by('ctime')

        bands_list_data = bands_list.values_list('pk', 'label', 'ctime')

        # split data in chunks
        grouped_bands_list_data = grouper(query_group_size,
                                          [(_[0], _[1], _[2])
                                           for _ in bands_list_data])

        entry_list = []
        for this_chunk in grouped_bands_list_data:
            # gather all banddata pks
            pks = [_[0] for _ in this_chunk]

            # get the closest structures (WITHOUT DbPath)
            q_object = Q(type='data.structure.StructureData.')
            structure_dict = get_closest_parents(pks, q_object, chunk_size=1)

            struc_pks = [structure_dict[pk] for pk in pks]

            # query for the attributes needed for the structure formula
            attr_query = Q(key__startswith='kinds') | Q(
                key__startswith='sites')
            attrs = models.DbAttribute.objects.filter(
                attr_query,
                dbnode__in=struc_pks).values_list('dbnode__pk', 'key',
                                                  'datatype', 'tval', 'fval',
                                                  'ival', 'bval', 'dval')

            results = defaultdict(dict)
            for attr in attrs:
                results[attr[0]][attr[1]] = {
                    "datatype": attr[2],
                    "tval": attr[3],
                    "fval": attr[4],
                    "ival": attr[5],
                    "bval": attr[6],
                    "dval": attr[7]
                }
            # organize all of it in a dictionary
            deser_data = {}
            for k in results:
                deser_data[k] = models.deserialize_attributes(
                    results[k], sep=models.DbAttribute._sep)

            # prepare the printout
            for ((bid, blabel, bdate), struc_pk) in zip(this_chunk, struc_pks):
                if struc_pk is not None:
                    # Exclude structures by the elements
                    if args.element is not None:
                        all_kinds = [
                            k['symbols'] for k in deser_data[struc_pk]['kinds']
                        ]
                        all_symbols = [
                            item for sublist in all_kinds for item in sublist
                        ]
                        if not any([s in args.element for s in all_symbols]):
                            continue
                    if args.element_only is not None:
                        all_kinds = [
                            k['symbols'] for k in deser_data[struc_pk]['kinds']
                        ]
                        all_symbols = [
                            item for sublist in all_kinds for item in sublist
                        ]
                        if not all(
                            [s in all_symbols for s in args.element_only]):
                            continue

                    # build the formula
                    symbol_dict = {
                        k['name']: get_symbols_string(k['symbols'],
                                                      k['weights'])
                        for k in deser_data[struc_pk]['kinds']
                    }
                    try:
                        symbol_list = [
                            symbol_dict[s['kind_name']]
                            for s in deser_data[struc_pk]['sites']
                        ]
                        formula = get_formula(symbol_list,
                                              mode=args.formulamode)
                    # If for some reason there is no kind with the name
                    # referenced by the site
                    except KeyError:
                        formula = "<<UNKNOWN>>"
                        # cycle if we imposed the filter on elements
                        if args.element is not None or args.element_only is not None:
                            continue
                else:
                    formula = "<<UNKNOWN>>"
                entry_list.append([
                    str(bid),
                    str(formula),
                    bdate.strftime('%d %b %Y'), blabel
                ])

        return entry_list
Esempio n. 7
0
def spin_dependent_subparcer(out_info_dict):
    """
    This find the projection and bands arrays from the out_file and
    out_info_dict. Used to handle the different possible spin-cases in
    a convenient manner.

    :param out_info_dict: contains various technical internals useful in parsing
    :return: ProjectionData, BandsData parsed from out_file
    """

    out_file = out_info_dict["out_file"]
    spin_down = out_info_dict["spin_down"]
    od = out_info_dict #using a shorter name for convenience
    #   regular expressions needed for later parsing
    WaveFraction1_re = re.compile(r"\=(.*?)\*")  # state composition 1
    WaveFractionremain_re = re.compile(r"\+(.*?)\*")  # state comp 2
    FunctionId_re = re.compile(r"\#(.*?)\]")  # state identity
    # primes arrays for the later parsing
    num_wfc = len(od["wfc_lines"])
    bands = np.zeros([od["k_states"], od["num_bands"]])
    projection_arrays = np.zeros([od["k_states"], od["num_bands"], num_wfc])

    try:
        for i in range(od["k_states"]):
            if spin_down:
                i += od["k_states"]
            # grabs band energy
            for j in range (i*od["num_bands"],(i+1)*od["num_bands"],1):
                out_ind = od["e_lines"][j]
                val = out_file[out_ind].split()[4]
                bands[i%od["k_states"]][j%od["num_bands"]] = val
                #subloop grabs pdos
                wave_fraction = []
                wave_id = []
                for k in range(od["e_lines"][j]+1,od["psi_lines"][j],1):
                    out_line = out_file[k]
                    wave_fraction += WaveFraction1_re.findall(out_line)
                    wave_fraction += WaveFractionremain_re.findall(out_line)
                    wave_id += FunctionId_re.findall(out_line)
                if len(wave_id) != len(wave_fraction):
                    raise IndexError
                for l in range (len(wave_id)):
                    wave_id[l] = int(wave_id[l])
                    wave_fraction[l] = float(wave_fraction[l])
                    #sets relevant values in pdos_array
                    projection_arrays[i%od["k_states"]][
                        j%od["num_bands"]][wave_id[l]-1] = wave_fraction[l]
    except IndexError:
        raise QEOutputParsingError("the standard out file does not "
                                   "comply with the official "
                                   "documentation.")

    bands_data = BandsData()
    try:
    # Attempts to retrive the kpoints from the parent calc
        parent_calc = out_info_dict["parent_calc"]
        parent_kpoints = parent_calc.get_inputs_dict()['kpoints']
        if len(od['k_vect']) != len(parent_kpoints.get_kpoints()):
            raise AttributeError
        bands_data.set_kpointsdata(parent_kpoints)
    except AttributeError:
        bands_data.set_kpoints(od['k_vect'].astype(float))

    bands_data.set_bands(bands, units='eV')

    orbitals = out_info_dict["orbitals"]
    if len(orbitals) != np.shape(projection_arrays[0,0,:])[0]:
        raise QEOutputParsingError("There was an internal parsing error, "
                                   " the projection array shape does not agree"
                                   " with the number of orbitals")
    projection_data = ProjectionData()
    projection_data.set_reference_bandsdata(bands_data)
    projections = [projection_arrays[:,:,i] for i in range(len(orbitals))]

    # Do the bands_check manually here
    for projection in projections:
        if np.shape(projection) !=  np.shape(bands):
            raise AttributeError("Projections not the same shape as the bands")


    #insert here some logic to assign pdos to the orbitals
    pdos_arrays = spin_dependent_pdos_subparcer(out_info_dict)
    energy_arrays = [out_info_dict["energy"]]*len(orbitals)
    projection_data.set_projectiondata(orbitals,
                                       list_of_projections=projections,
                                       list_of_energy=energy_arrays,
                                       list_of_pdos=pdos_arrays,
                                       bands_check=False)
    # pdos=pdos_arrays
    return bands_data,  projection_data
Esempio n. 8
0
def band_parser(band_dat_path, band_kpt_path, special_points, structure):
    """
    Parsers the bands output data, along with the special points retrieved
    from the input kpoints to construct a BandsData object which is then
    returned. Cannot handle discontinuities in the kpath, if two points are
    assigned to same spot only one will be passed.

    :param band_dat_path: file path to the aiida_band.dat file
    :param band_kpt_path: file path to the aiida_band.kpt file
    :param special_points: special points to add labels to the bands a dictionary in
        the form expected in the input as described in the wannier90 documentation
    :return: BandsData object constructed from the input params
    """
    import numpy as np
    from aiida.orm.data.array.bands import BandsData
    from aiida.orm.data.array.kpoints import KpointsData

    # imports the data
    out_kpt = np.genfromtxt(band_kpt_path, skip_header=1, usecols=(0, 1, 2))
    out_dat = np.genfromtxt(band_dat_path, usecols=1)

    # reshaps the output bands
    out_dat = out_dat.reshape(len(out_kpt), (len(out_dat) / len(out_kpt)),
                              order="F")

    # finds expected points of discontinuity
    kpath = special_points['path']
    cont_break = [(i, (kpath[i - 1][1], kpath[i][0]))
                  for i in range(1, len(kpath))
                  if kpath[i - 1][1] != kpath[i][0]]

    # finds the special points
    special_points_dict = special_points['point_coords']
    labels = [(i, k) for k in special_points_dict for i in range(len(out_kpt))
              if all(np.isclose(special_points_dict[k], out_kpt[i]))]
    labels.sort()

    # Checks and appends labels if discontinuity
    appends = []
    for x in cont_break:
        # two cases the break is before or the break is after
        # if the break is before
        if labels[x[0]][1] != x[1][0]:
            # checks to see if the discontinuity was already there
            if labels[x[0] - 1] == x[1][0]:
                continue
            else:
                insert_point = x[0]
                new_label = x[1][0]
                kpoint = labels[x[0]][0] - 1
                appends += [[insert_point, new_label, kpoint]]
        # if the break is after
        if labels[x[0]][1] != x[1][1]:
            # checks to see if the discontinuity was already there
            if labels[x[0] + 1] == x[1][1]:
                continue
            else:
                insert_point = x[0] + 1
                new_label = x[1][1]
                kpoint = labels[x[0]][0] + 1
                appends += [[insert_point, new_label, kpoint]]
    appends.sort()
    for i in range(len(appends)):
        append = appends[i]
        labels.insert(append[0] + i, (append[2], unicode(append[1])))
    bands = BandsData()
    k = KpointsData()
    k.set_cell_from_structure(structure)
    k.set_kpoints(out_kpt, cartesian=False)
    bands.set_kpointsdata(k)
    bands.set_bands(out_dat, units='eV')
    bands.labels = labels
    return bands
Esempio n. 9
0
    def parse_with_retrieved(self, retrieved):
        """
        Parse the output nodes for a PwCalculations from a dictionary of retrieved nodes.
        Two nodes that are expected are the default 'retrieved' FolderData node which will
        store the retrieved files permanently in the repository. The second required node
        is the unstored FolderData node with the temporary retrieved files, which should
        be passed under the key 'retrieved_temporary_folder_key' of the Parser class.

        :param retrieved: a dictionary of retrieved nodes
        """
        import os

        successful = True

        # Load the input dictionary
        parameters = self._calc.inp.parameters.get_dict()

        # Look for optional settings input node and potential 'parser_options' dictionary within it
        try:
            settings = self._calc.inp.settings.get_dict()
            parser_opts = settings[self.get_parser_settings_key()]
        except (AttributeError, KeyError):
            settings = {}
            parser_opts = {}

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # Verify that the retrieved_temporary_folder is within the arguments if temporary files were specified
        if self._calc._get_retrieve_temporary_list():
            try:
                temporary_folder = retrieved[
                    self.retrieved_temporary_folder_key]
                dir_with_bands = temporary_folder.get_abs_path('.')
            except KeyError:
                self.logger.error(
                    'the {} was not passed as an argument'.format(
                        self.retrieved_temporary_folder_key))
                return False, ()
        else:
            dir_with_bands = None

        list_of_files = out_folder.get_folder_list()

        # The stdout is required for parsing
        if self._calc._OUTPUT_FILE_NAME not in list_of_files:
            self.logger.error(
                "The standard output file '{}' was not found but is required".
                format(self._calc._OUTPUT_FILE_NAME))
            return False, ()

        # The xml file is required for parsing
        if self._calc._DATAFILE_XML_BASENAME not in list_of_files:
            self.logger.error(
                "The xml output file '{}' was not found but is required".
                format(self._calc._DATAFILE_XML_BASENAME))
            successful = False
            xml_file = None
        else:
            xml_file = os.path.join(out_folder.get_abs_path('.'),
                                    self._calc._DATAFILE_XML_BASENAME)

        out_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._OUTPUT_FILE_NAME)

        # Call the raw parsing function
        parsing_args = [
            out_file, parameters, parser_opts, xml_file, dir_with_bands
        ]
        out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output(
            *parsing_args)

        # If calculation was not considered failed already, use the new value
        successful = raw_successful if successful else successful

        # If the parser option 'all_symmetries' is not set to True, we reduce the raw parsed symmetries to safe space
        all_symmetries = parser_opts.get('all_symmetries', False)

        if not all_symmetries:

            # In the standard output, each symmetry operation print two rotation matrices:
            #
            # * S_cryst^T: matrix in crystal coordinates, transposed
            # * S_cart: matrix in cartesian coordinates,
            #
            # The XML files only print one matrix:
            #
            # * S_cryst: matrix in crystal coordinates
            #
            # The raw parsed symmetry information from the XML is large and will load the database heavily if stored as
            # is for each calculation. Instead, we will map these dictionaries onto a static dictionary of rotation
            # matrices generated by the _get_qe_symmetry_list static method. This dictionary will return the rotation
            # matrices in cartesian coordinates, i.e. S_cart. In order to compare the raw matrices from the XML to these
            # static matrices we have to convert S_cryst into S_cart. We derive here how that is done:
            #
            #   S_cryst * v_cryst = v_cryst'
            #
            # where v_cryst' is the rotated vector v_cryst under S_cryst
            # We define `cell` where cell vectors are rows. Converting a vector from crystal to cartesian
            # coordinates is defined as:
            #
            #   cell^T * v_cryst = v_cart
            #
            # The inverse of this operation is defined as
            #
            #   v_cryst = cell^Tinv * v_cart
            #
            # Replacing the last equation into the first we find:
            #
            #   S_cryst * cell^Tinv * v_cart = cell^Tinv * v_cart'
            #
            # Multiply on the left with cell^T gives:
            #
            #   cell^T * S_cryst * cell^Tinv * v_cart = v_cart'
            #
            # which can be rewritten as:
            #
            #   S_cart * v_cart = v_cart'
            #
            # where:
            #
            #   S_cart = cell^T * S_cryst * cell^Tinv
            #
            # We compute here the transpose and its inverse of the structure cell basis, which is needed to transform
            # the parsed rotation matrices, which are in crystal coordinates, to cartesian coordinates, which are the
            # matrices that are returned by the _get_qe_symmetry_list staticmethod
            cell = structure_data['cell']['lattice_vectors']
            cell_T = numpy.transpose(cell)
            cell_Tinv = numpy.linalg.inv(cell_T)

            try:
                if 'symmetries' in out_dict.keys():
                    old_symmetries = out_dict['symmetries']
                    new_symmetries = []
                    for this_sym in old_symmetries:
                        name = this_sym['name'].strip()
                        for i, this in enumerate(self._possible_symmetries):
                            # Since we do an exact comparison we strip the string name from whitespace
                            # and as soon as it is matched, we break to prevent it from matching another
                            if name == this['name'].strip():
                                index = i
                                break
                        else:
                            index = None
                            self.logger.error(
                                'Symmetry {} not found'.format(name))

                        new_dict = {}
                        if index is not None:
                            # The raw parsed rotation matrix is in crystal coordinates, whereas the mapped rotation
                            # in self._possible_symmetries is in cartesian coordinates. To allow them to be compared
                            # to make sure we matched the correct rotation symmetry, we first convert the parsed matrix
                            # to cartesian coordinates. For explanation of the method, see comment above.
                            rotation_cryst = this_sym['rotation']
                            rotation_cart_new = self._possible_symmetries[
                                index]['matrix']
                            rotation_cart_old = numpy.dot(
                                cell_T, numpy.dot(rotation_cryst, cell_Tinv))

                            inversion = self._possible_symmetries[index][
                                'inversion']
                            if not are_matrices_equal(
                                    rotation_cart_old,
                                    rotation_cart_new,
                                    swap_sign_matrix_b=inversion):
                                self.logger.error(
                                    'Mapped rotation matrix {} does not match the original rotation {}'
                                    .format(rotation_cart_new,
                                            rotation_cart_old))
                                new_dict['all_symmetries'] = this_sym
                            else:
                                # Note: here I lose the information about equivalent ions and fractional_translation.
                                new_dict['t_rev'] = this_sym['t_rev']
                                new_dict['symmetry_number'] = index
                        else:
                            new_dict['all_symmetries'] = this_sym

                        new_symmetries.append(new_dict)

                    out_dict[
                        'symmetries'] = new_symmetries  # and overwrite the old one
            except KeyError:  # no symmetries were parsed (failed case, likely)
                self.logger.error("No symmetries were found in output")

        new_nodes_list = []

        # I eventually save the new structure. structure_data is unnecessary after this
        in_struc = self._calc.get_inputs_dict()['structure']
        type_calc = parameters['CONTROL']['calculation']
        struc = in_struc
        if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']:
            if 'cell' in structure_data.keys():
                struc = convert_qe2aiida_structure(structure_data,
                                                   input_structure=in_struc)
                new_nodes_list.append(
                    (self.get_linkname_outstructure(), struc))

        k_points_list = trajectory_data.pop('k_points', None)
        k_points_weights_list = trajectory_data.pop('k_points_weights', None)

        if k_points_list is not None:

            # Build the kpoints object
            if out_dict['k_points_units'] not in ['2 pi / Angstrom']:
                raise QEOutputParsingError(
                    'Error in kpoints units (should be cartesian)')

            kpoints_from_output = KpointsData()
            kpoints_from_output.set_cell_from_structure(struc)
            kpoints_from_output.set_kpoints(k_points_list,
                                            cartesian=True,
                                            weights=k_points_weights_list)
            kpoints_from_input = self._calc.inp.kpoints

            if not bands_data:
                try:
                    kpoints_from_input.get_kpoints()
                except AttributeError:
                    new_nodes_list += [(self.get_linkname_out_kpoints(),
                                        kpoints_from_output)]

            # Converting bands into a BandsData object (including the kpoints)
            if bands_data:
                kpoints_for_bands = kpoints_from_output

                try:
                    kpoints_from_input.get_kpoints()
                    kpoints_for_bands.labels = kpoints_from_input.labels
                except (AttributeError, ValueError, TypeError):
                    # AttributeError: no list of kpoints in input
                    # ValueError: labels from input do not match the output
                    #      list of kpoints (some kpoints are missing)
                    # TypeError: labels are not set, so kpoints_from_input.labels=None
                    pass

                # Get the bands occupations and correct the occupations of QE:
                # If it computes only one component, it occupies it with half number of electrons
                try:
                    bands_data['occupations'][1]
                    the_occupations = bands_data['occupations']
                except IndexError:
                    the_occupations = 2. * numpy.array(
                        bands_data['occupations'][0])

                try:
                    bands_data['bands'][1]
                    bands_energies = bands_data['bands']
                except IndexError:
                    bands_energies = bands_data['bands'][0]

                the_bands_data = BandsData()
                the_bands_data.set_kpointsdata(kpoints_for_bands)
                the_bands_data.set_bands(bands_energies,
                                         units=bands_data['bands_units'],
                                         occupations=the_occupations)

                new_nodes_list += [('output_band', the_bands_data)]
                out_dict['linknames_band'] = ['output_band']

        # Separate the atomic_occupations dictionary in its own node if it is present
        atomic_occupations = out_dict.get('atomic_occupations', {})
        if atomic_occupations:
            out_dict.pop('atomic_occupations')
            atomic_occupations_node = ParameterData(dict=atomic_occupations)
            new_nodes_list.append(
                ('output_atomic_occupations', atomic_occupations_node))

        output_params = ParameterData(dict=out_dict)
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        if trajectory_data:
            from aiida.orm.data.array.trajectory import TrajectoryData
            from aiida.orm.data.array import ArrayData
            try:
                positions = numpy.array(
                    trajectory_data.pop('atomic_positions_relax'))
                try:
                    cells = numpy.array(
                        trajectory_data.pop('lattice_vectors_relax'))
                    # if KeyError, the MD was at fixed cell
                except KeyError:
                    cells = numpy.array([in_struc.cell] * len(positions))

                symbols = numpy.array(
                    [str(i.kind_name) for i in in_struc.sites])
                stepids = numpy.arange(
                    len(positions))  # a growing integer per step
                # I will insert time parsing when they fix their issues about time
                # printing (logic is broken if restart is on)

                traj = TrajectoryData()
                traj.set_trajectory(
                    stepids=stepids,
                    cells=cells,
                    symbols=symbols,
                    positions=positions,
                )
                for x in trajectory_data.iteritems():
                    traj.set_array(x[0], numpy.array(x[1]))
                new_nodes_list.append(
                    (self.get_linkname_outtrajectory(), traj))

            except KeyError:
                # forces, atomic charges and atomic mag. moments, in scf calculation (when outputed)
                arraydata = ArrayData()
                for x in trajectory_data.iteritems():
                    arraydata.set_array(x[0], numpy.array(x[1]))
                new_nodes_list.append(
                    (self.get_linkname_outarray(), arraydata))

        return successful, new_nodes_list