Beispiel #1
0
    def parse_with_retrieved(self, retrieved):
        """
        Receives in input a dictionary of retrieved nodes.
        Does all the logic here.
        """
        from aiida.common.exceptions import InvalidOperation
        import os
        import glob

        successful = True

        # check if I'm not to overwrite anything
        #state = self._calc.get_state()
        #if state != calc_states.PARSING:
        #    raise InvalidOperation("Calculation not in {} state"
        #                           .format(calc_states.PARSING) )

        # look for eventual flags of the parser
        try:
            parser_opts = self._calc.inp.settings.get_dict()[
                self.get_parser_settings_key()]
        except (AttributeError, KeyError):
            parser_opts = {}

        # load the input dictionary
        # TODO: pass this input_dict to the parser. It might need it.
        input_dict = self._calc.inp.parameters.get_dict()

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()
        # at least the stdout should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            self.logger.error("Standard output not found")
            successful = False
            return successful, ()
        # if there is something more, I note it down, so to call the raw parser
        # with the right options
        # look for xml
        has_xml = False
        if self._calc._DATAFILE_XML_BASENAME in list_of_files:
            has_xml = True
        # look for bands
        has_bands = False
        if glob.glob(os.path.join(out_folder.get_abs_path('.'), 'K*[0-9]')):
            # Note: assuming format of kpoints subfolder is K*[0-9]
            has_bands = True
            # TODO: maybe it can be more general than bands only?
        out_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._OUTPUT_FILE_NAME)
        xml_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._DATAFILE_XML_BASENAME)
        dir_with_bands = out_folder.get_abs_path('.')

        # call the raw parsing function
        parsing_args = [out_file, input_dict, parser_opts]
        if has_xml:
            parsing_args.append(xml_file)
        if has_bands:
            if not has_xml:
                self.logger.warning("Cannot parse bands if xml file not "
                                    "found")
            else:
                parsing_args.append(dir_with_bands)

        out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output(
            *parsing_args)

        # if calculation was not considered failed already, use the new value
        successful = raw_successful if successful else successful

        # The symmetry info has large arrays, that occupy most of the database.
        # turns out most of this is due to 64 matrices that are repeated over and over again.
        # therefore I map part of the results in a list of dictionaries wrote here once and for all
        # if the parser_opts has a key all_symmetries set to True, I don't reduce it
        all_symmetries = parser_opts.get('all_symmetries', False)
        if not all_symmetries:
            try:
                if 'symmetries' in out_dict.keys():
                    old_symmetries = out_dict['symmetries']
                    new_symmetries = []
                    for this_sym in old_symmetries:
                        name = this_sym['name']
                        index = None
                        for i, this in enumerate(self._possible_symmetries):
                            if name in this['name']:
                                index = i
                        if index is None:
                            self.logger.error(
                                "Symmetry {} not found".format(name))
                        new_dict = {}
                        # note: here I lose the information about equivalent
                        # ions and fractional_translation.
                        # They will be present with all_symmetries=True
                        new_dict['t_rev'] = this_sym['t_rev']
                        new_dict['symmetry_number'] = index
                        new_symmetries.append(new_dict)
                    out_dict[
                        'symmetries'] = new_symmetries  # and overwrite the old one
            except KeyError:  # no symmetries were parsed (failed case, likely)
                self.logger.error("No symmetries were found in output")

        new_nodes_list = []

        # I eventually save the new structure. structure_data is unnecessary after this
        in_struc = self._calc.get_inputs_dict()['structure']
        type_calc = input_dict['CONTROL']['calculation']
        struc = in_struc
        if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']:
            if 'cell' in structure_data.keys():
                struc = convert_qe2aiida_structure(structure_data,
                                                   input_structure=in_struc)
                new_nodes_list.append(
                    (self.get_linkname_outstructure(), struc))

        k_points_list = trajectory_data.pop('k_points', None)
        k_points_weights_list = trajectory_data.pop('k_points_weights', None)

        if k_points_list is not None:

            # build the kpoints object
            if out_dict['k_points_units'] not in ['2 pi / Angstrom']:
                raise QEOutputParsingError(
                    'Error in kpoints units (should be cartesian)')
            # converting bands into a BandsData object (including the kpoints)

            kpoints_from_output = KpointsData()
            kpoints_from_output.set_cell_from_structure(struc)
            kpoints_from_output.set_kpoints(k_points_list,
                                            cartesian=True,
                                            weights=k_points_weights_list)
            kpoints_from_input = self._calc.inp.kpoints

            if not bands_data:
                try:
                    kpoints_from_input.get_kpoints()
                except AttributeError:
                    new_nodes_list += [(self.get_linkname_out_kpoints(),
                                        kpoints_from_output)]

            if bands_data:
                import numpy
                # converting bands into a BandsData object (including the kpoints)

                kpoints_for_bands = kpoints_from_output

                try:
                    kpoints_from_input.get_kpoints()
                    kpoints_for_bands.labels = kpoints_from_input.labels
                except (AttributeError, ValueError, TypeError):
                    # AttributeError: no list of kpoints in input
                    # ValueError: labels from input do not match the output
                    #      list of kpoints (some kpoints are missing)
                    # TypeError: labels are not set, so kpoints_from_input.labels=None
                    pass

                # get the bands occupations.
                # correct the occupations of QE: if it computes only one component,
                # it occupies it with half number of electrons
                try:
                    bands_data['occupations'][1]
                    the_occupations = bands_data['occupations']
                except IndexError:
                    the_occupations = 2. * numpy.array(
                        bands_data['occupations'][0])

                try:
                    bands_data['bands'][1]
                    bands_energies = bands_data['bands']
                except IndexError:
                    bands_energies = bands_data['bands'][0]

                the_bands_data = BandsData()
                the_bands_data.set_kpointsdata(kpoints_for_bands)
                the_bands_data.set_bands(bands_energies,
                                         units=bands_data['bands_units'],
                                         occupations=the_occupations)

                new_nodes_list += [('output_band', the_bands_data)]
                out_dict['linknames_band'] = ['output_band']

        # convert the dictionary into an AiiDA object
        output_params = ParameterData(dict=out_dict)
        # return it to the execmanager
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        if trajectory_data:
            import numpy
            from aiida.orm.data.array.trajectory import TrajectoryData
            from aiida.orm.data.array import ArrayData
            try:
                positions = numpy.array(
                    trajectory_data.pop('atomic_positions_relax'))
                try:
                    cells = numpy.array(
                        trajectory_data.pop('lattice_vectors_relax'))
                    # if KeyError, the MD was at fixed cell
                except KeyError:
                    cells = numpy.array([in_struc.cell] * len(positions))

                symbols = numpy.array(
                    [str(i.kind_name) for i in in_struc.sites])
                stepids = numpy.arange(
                    len(positions))  # a growing integer per step
                # I will insert time parsing when they fix their issues about time
                # printing (logic is broken if restart is on)

                traj = TrajectoryData()
                traj.set_trajectory(
                    stepids=stepids,
                    cells=cells,
                    symbols=symbols,
                    positions=positions,
                )
                for x in trajectory_data.iteritems():
                    traj.set_array(x[0], numpy.array(x[1]))
                # return it to the execmanager
                new_nodes_list.append(
                    (self.get_linkname_outtrajectory(), traj))

            except KeyError:
                # forces, atomic charges and atomic mag. moments, in scf
                # calculation (when outputed)
                arraydata = ArrayData()
                for x in trajectory_data.iteritems():
                    arraydata.set_array(x[0], numpy.array(x[1]))
                # return it to the execmanager
                new_nodes_list.append(
                    (self.get_linkname_outarray(), arraydata))

        return successful, new_nodes_list
Beispiel #2
0
    def parse_with_retrieved(self, retrieved):
        """
        Parses the calculation-output datafolder, and stores
        results.

        :param retrieved: a dictionary of retrieved nodes, where the keys
            are the link names of retrieved nodes, and the values are the
            nodes.
        """
        from aiida.common.exceptions import InvalidOperation
        from aiida.orm.data.array.trajectory import TrajectoryData
        from aiida.orm.data.array import ArrayData
        import os
        import numpy
        import copy

        successful = True

        # check if I'm not to overwrite anything
        #state = self._calc.get_state()
        #if state != calc_states.PARSING:
        #    raise InvalidOperation("Calculation not in {} state"
        #                           .format(calc_states.PARSING) )

        # look for eventual flags of the parser
        try:
            parser_opts = self._calc.inp.settings.get_dict()[
                self.get_parser_settings_key()]
        except (AttributeError, KeyError):
            parser_opts = {}

        # load the pw input dictionary
        pw_input_dict = self._calc.inp.pw_parameters.get_dict()

        # load the pw input dictionary
        neb_input_dict = self._calc.inp.neb_parameters.get_dict()

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            successful = False
            return successful, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()
        # at least the stdout should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            self.logger.error("Standard output not found")
            successful = False
            return successful, ()

        out_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._OUTPUT_FILE_NAME)

        # First parse the Neb output
        neb_out_dict, iteration_data, raw_successful = parse_raw_output_neb(
            out_file, neb_input_dict)

        # if calculation was not considered failed already, use the new value
        successful = raw_successful if successful else successful

        # Retrieve the number of images
        try:
            num_images = neb_input_dict['num_of_images']
        except KeyError:
            try:
                num_images = neb_out_dict['num_of_images']
            except KeyError:
                self.logger.error(
                    "Impossible to understand the number of images")
                successful = False
                return successful, ()

        # Now parse the information from the single pw calculations for the different images
        image_data = {}
        positions = []
        cells = []
        for i in range(num_images):
            # look for xml and parse
            xml_file = os.path.join(out_folder.get_abs_path('.'),
                                    self._calc._PREFIX + '_{}'.format(i + 1),
                                    self._calc._PREFIX + '.save',
                                    self._calc._DATAFILE_XML_BASENAME)
            try:
                with open(xml_file, 'r') as f:
                    xml_lines = f.read()  # Note: read() and not readlines()
            except IOError:
                self.logger.error(
                    "No xml file found for image {} at {}".format(
                        i + 1, xml_file))
                successful = False
                return successful, ()
            xml_data, structure_dict, bands_data = parse_pw_xml_output(
                xml_lines)

            # convert the dictionary obtained from parsing the xml to an AiiDA StructureData
            structure_data = convert_qe2aiida_structure(structure_dict)

            # look for pw output and parse it
            pw_out_file = os.path.join(
                out_folder.get_abs_path('.'),
                self._calc._PREFIX + '_{}'.format(i + 1), 'PW.out')
            try:
                with open(pw_out_file, 'r') as f:
                    pw_out_lines = f.read()  # Note: read() and not readlines()
            except IOError:
                self.logger.error(
                    "No pw output file found for image {}".format(i + 1))
                successful = False
                return successful, ()

            pw_out_data, trajectory_data, critical_messages = parse_pw_text_output(
                pw_out_lines, xml_data, structure_dict, pw_input_dict)

            # I add in the out_data all the last elements of trajectory_data values.
            # Safe for some large arrays, that I will likely never query.
            skip_keys = [
                'forces', 'atomic_magnetic_moments', 'atomic_charges',
                'lattice_vectors_relax', 'atomic_positions_relax',
                'atomic_species_name'
            ]
            tmp_trajectory_data = copy.copy(trajectory_data)
            for x in tmp_trajectory_data.iteritems():
                if x[0] in skip_keys:
                    continue
                pw_out_data[x[0]] = x[1][-1]
                if len(x[1]) == 1:  # delete eventual keys that are not arrays
                    trajectory_data.pop(x[0])
            # As the k points are an array that is rather large, and again it's not something I'm going to parse likely
            # since it's an info mainly contained in the input file, I move it to the trajectory data
            for key in ['k_points', 'k_points_weights']:
                try:
                    trajectory_data[key] = xml_data.pop(key)
                except KeyError:
                    pass

            key = 'pw_output_image_{}'.format(i + 1)
            image_data[key] = dict(pw_out_data.items() + xml_data.items())

            positions.append([site.position for site in structure_data.sites])
            cells.append(structure_data.cell)

            # If a warning was already present in the NEB, add also PW warnings to the neb output data,
            # avoiding repetitions.
            if neb_out_dict['warnings']:
                for warning in pw_out_data['warnings']:
                    if warning not in neb_out_dict['warnings']:
                        neb_out_dict['warnings'].append(warning)

        # Symbols can be obtained simply from the last image
        symbols = [str(site.kind_name) for site in structure_data.sites]

        new_nodes_list = []

        # convert the dictionary into an AiiDA object
        output_params = ParameterData(dict=dict(neb_out_dict.items() +
                                                image_data.items()))

        # return it to the execmanager
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        # convert data on structure of images into a TrajectoryData
        traj = TrajectoryData()
        traj.set_trajectory(
            stepids=numpy.arange(1, num_images + 1),
            cells=numpy.array(cells),
            symbols=numpy.array(symbols),
            positions=numpy.array(positions),
        )

        # return it to the execmanager
        new_nodes_list.append((self.get_linkname_outtrajectory(), traj))

        if parser_opts.get('all_iterations', False):
            if iteration_data:
                from aiida.orm.data.array import ArrayData

                arraydata = ArrayData()
                for x in iteration_data.iteritems():
                    arraydata.set_array(x[0], numpy.array(x[1]))
                new_nodes_list.append(
                    (self.get_linkname_iterationarray(), arraydata))

        # Load the original and interpolated energy profile along the minimum-energy path (mep)
        try:
            mep_file = os.path.join(out_folder.get_abs_path('.'),
                                    self._calc._PREFIX + '.dat')
            mep = numpy.loadtxt(mep_file)
        except Exception:
            self.logger.warning(
                "Impossible to find the file with image energies "
                "versus reaction coordinate.")
            mep = numpy.array([[]])

        try:
            interp_mep_file = os.path.join(out_folder.get_abs_path('.'),
                                           self._calc._PREFIX + '.int')
            interp_mep = numpy.loadtxt(interp_mep_file)
        except Exception:
            self.logger.warning(
                "Impossible to find the file with the interpolation "
                "of image energies versus reaction coordinate.")
            interp_mep = numpy.array([[]])
        # Create an ArrayData with the energy profiles
        mep_arraydata = ArrayData()
        mep_arraydata.set_array('mep', mep)
        mep_arraydata.set_array('interpolated_mep', interp_mep)
        new_nodes_list.append((self.get_linkname_meparray(), mep_arraydata))

        return successful, new_nodes_list
Beispiel #3
0
    def parse_with_retrieved(self, retrieved):
        """
        Receives in input a dictionary of retrieved nodes.
        Does all the logic here.
        """
        from aiida.common.exceptions import InvalidOperation
        import os
        import glob

        successful = True

        # check if I'm not to overwrite anything
        #state = self._calc.get_state()
        #if state != calc_states.PARSING:
        #    raise InvalidOperation("Calculation not in {} state"
        #                           .format(calc_states.PARSING) )

        # retrieve the input parameter
        calc_input = self._calc.inp.parameters

        # look for eventual flags of the parser
        try:
            parser_opts = self._calc.inp.settings.get_dict()[
                self.get_parser_settings_key()]
        except (AttributeError, KeyError):
            parser_opts = {}

        # load the input dictionary
        # TODO: pass this input_dict to the parser. It might need it.
        input_dict = self._calc.inp.parameters.get_dict()

        # Check that the retrieved folder is there
        try:
            out_folder = retrieved[self._calc._get_linkname_retrieved()]
        except KeyError:
            self.logger.error("No retrieved folder found")
            return False, ()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()
        # at least the stdout should exist
        if not self._calc._OUTPUT_FILE_NAME in list_of_files:
            self.logger.error("Standard output not found")
            successful = False
            return successful, ()
        # if there is something more, I note it down, so to call the raw parser
        # with the right options
        # look for xml
        has_xml = False
        if self._calc._DATAFILE_XML_BASENAME in list_of_files:
            has_xml = True
        # look for bands
        has_bands = False
        if glob.glob(os.path.join(out_folder.get_abs_path('.'), 'K*[0-9]')):
            # Note: assuming format of kpoints subfolder is K*[0-9]
            has_bands = True
            # TODO: maybe it can be more general than bands only?
        out_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._OUTPUT_FILE_NAME)
        xml_file = os.path.join(out_folder.get_abs_path('.'),
                                self._calc._DATAFILE_XML_BASENAME)
        dir_with_bands = out_folder.get_abs_path('.')

        # call the raw parsing function
        parsing_args = [out_file, input_dict, parser_opts]
        if has_xml:
            parsing_args.append(xml_file)
        if has_bands:
            if not has_xml:
                self.logger.warning("Cannot parse bands if xml file not "
                                    "found")
            else:
                parsing_args.append(dir_with_bands)

        out_dict, trajectory_data, structure_data, raw_successful = parse_raw_output(
            *parsing_args)

        # if calculation was not considered failed already, use the new value
        successful = raw_successful if successful else successful

        new_nodes_list = []

        # I eventually save the new structure. structure_data is unnecessary after this
        in_struc = self._calc.get_inputs_dict()['structure']
        type_calc = input_dict['CONTROL']['calculation']
        struc = in_struc
        if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']:
            if 'cell' in structure_data.keys():
                struc = convert_qe2aiida_structure(structure_data,
                                                   input_structure=in_struc)
                new_nodes_list.append(
                    (self.get_linkname_outstructure(), struc))

        k_points_list = trajectory_data.pop('k_points', None)
        k_points_weights_list = trajectory_data.pop('k_points_weights', None)

        if k_points_list is not None:

            # build the kpoints object
            if out_dict['k_points_units'] not in ['2 pi / Angstrom']:
                raise QEOutputParsingError(
                    'Error in kpoints units (should be cartesian)')
            # converting bands into a BandsData object (including the kpoints)

            kpoints_from_output = KpointsData()
            kpoints_from_output.set_cell_from_structure(struc)
            kpoints_from_output.set_kpoints(k_points_list,
                                            cartesian=True,
                                            weights=k_points_weights_list)
            kpoints_from_input = self._calc.inp.kpoints
            try:
                kpoints_from_input.get_kpoints()
            except AttributeError:
                new_nodes_list += [(self.get_linkname_out_kpoints(),
                                    kpoints_from_output)]

        # convert the dictionary into an AiiDA object
        output_params = ParameterData(dict=out_dict)
        # return it to the execmanager
        new_nodes_list.append((self.get_linkname_outparams(), output_params))

        if trajectory_data:
            import numpy
            from aiida.orm.data.array.trajectory import TrajectoryData
            from aiida.orm.data.array import ArrayData

            try:
                positions = numpy.array(
                    trajectory_data.pop('atomic_positions_relax'))
                try:
                    cells = numpy.array(
                        trajectory_data.pop('lattice_vectors_relax'))
                    # if KeyError, the MD was at fixed cell
                except KeyError:
                    cells = numpy.array([in_struc.cell] * len(positions))

                symbols = numpy.array(
                    [str(i.kind_name) for i in in_struc.sites])
                stepids = numpy.arange(
                    len(positions))  # a growing integer per step
                # I will insert time parsing when they fix their issues about time
                # printing (logic is broken if restart is on)

                traj = TrajectoryData()
                traj.set_trajectory(
                    stepids=stepids,
                    cells=cells,
                    symbols=symbols,
                    positions=positions,
                )
                for x in trajectory_data.iteritems():
                    traj.set_array(x[0], numpy.array(x[1]))
                # return it to the execmanager
                new_nodes_list.append(
                    (self.get_linkname_outtrajectory(), traj))

            except KeyError:  # forces in scf calculation (when outputed)
                arraydata = ArrayData()
                for x in trajectory_data.iteritems():
                    arraydata.set_array(x[0], numpy.array(x[1]))
                # return it to the execmanager
                new_nodes_list.append(
                    (self.get_linkname_outarray(), arraydata))

        return successful, new_nodes_list