def parse_with_retrieved(self, retrieved): """ Receives in input a dictionary of retrieved nodes. Does all the logic here. """ from aiida.common.exceptions import InvalidOperation import os import glob successful = True # check if I'm not to overwrite anything #state = self._calc.get_state() #if state != calc_states.PARSING: # raise InvalidOperation("Calculation not in {} state" # .format(calc_states.PARSING) ) # retrieve the input parameter calc_input = self._calc.inp.parameters # look for eventual flags of the parser try: parser_opts = self._calc.inp.settings.get_dict()[ self.get_parser_settings_key()] except (AttributeError, KeyError): parser_opts = {} # load the input dictionary # TODO: pass this input_dict to the parser. It might need it. input_dict = self._calc.inp.parameters.get_dict() # Check that the retrieved folder is there try: out_folder = retrieved[self._calc._get_linkname_retrieved()] except KeyError: self.logger.error("No retrieved folder found") return False, () # check what is inside the folder list_of_files = out_folder.get_folder_list() # at least the stdout should exist if not self._calc._OUTPUT_FILE_NAME in list_of_files: self.logger.error("Standard output not found") successful = False return successful, () # if there is something more, I note it down, so to call the raw parser # with the right options # look for xml has_xml = False if self._calc._DATAFILE_XML_BASENAME in list_of_files: has_xml = True # look for bands has_bands = False if glob.glob(os.path.join(out_folder.get_abs_path('.'), 'K*[0-9]')): # Note: assuming format of kpoints subfolder is K*[0-9] has_bands = True # TODO: maybe it can be more general than bands only? out_file = os.path.join(out_folder.get_abs_path('.'), self._calc._OUTPUT_FILE_NAME) xml_file = os.path.join(out_folder.get_abs_path('.'), self._calc._DATAFILE_XML_BASENAME) dir_with_bands = out_folder.get_abs_path('.') # call the raw parsing function parsing_args = [out_file, input_dict, parser_opts] if has_xml: parsing_args.append(xml_file) if has_bands: if not has_xml: self.logger.warning("Cannot parse bands if xml file not " "found") else: parsing_args.append(dir_with_bands) out_dict, trajectory_data, structure_data, raw_successful = parse_raw_output( *parsing_args) # if calculation was not considered failed already, use the new value successful = raw_successful if successful else successful new_nodes_list = [] # I eventually save the new structure. structure_data is unnecessary after this in_struc = self._calc.get_inputs_dict()['structure'] type_calc = input_dict['CONTROL']['calculation'] struc = in_struc if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']: if 'cell' in structure_data.keys(): struc = convert_qe2aiida_structure(structure_data, input_structure=in_struc) new_nodes_list.append( (self.get_linkname_outstructure(), struc)) k_points_list = trajectory_data.pop('k_points', None) k_points_weights_list = trajectory_data.pop('k_points_weights', None) if k_points_list is not None: # build the kpoints object if out_dict['k_points_units'] not in ['2 pi / Angstrom']: raise QEOutputParsingError( 'Error in kpoints units (should be cartesian)') # converting bands into a BandsData object (including the kpoints) kpoints_from_output = KpointsData() kpoints_from_output.set_cell_from_structure(struc) kpoints_from_output.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list) kpoints_from_input = self._calc.inp.kpoints try: kpoints_from_input.get_kpoints() except AttributeError: new_nodes_list += [(self.get_linkname_out_kpoints(), kpoints_from_output)] # convert the dictionary into an AiiDA object output_params = ParameterData(dict=out_dict) # return it to the execmanager new_nodes_list.append((self.get_linkname_outparams(), output_params)) if trajectory_data: import numpy from aiida.orm.data.array.trajectory import TrajectoryData from aiida.orm.data.array import ArrayData try: positions = numpy.array( trajectory_data.pop('atomic_positions_relax')) try: cells = numpy.array( trajectory_data.pop('lattice_vectors_relax')) # if KeyError, the MD was at fixed cell except KeyError: cells = numpy.array([in_struc.cell] * len(positions)) symbols = numpy.array( [str(i.kind_name) for i in in_struc.sites]) stepids = numpy.arange( len(positions)) # a growing integer per step # I will insert time parsing when they fix their issues about time # printing (logic is broken if restart is on) traj = TrajectoryData() traj.set_trajectory( stepids=stepids, cells=cells, symbols=symbols, positions=positions, ) for x in trajectory_data.iteritems(): traj.set_array(x[0], numpy.array(x[1])) # return it to the execmanager new_nodes_list.append( (self.get_linkname_outtrajectory(), traj)) except KeyError: # forces in scf calculation (when outputed) arraydata = ArrayData() for x in trajectory_data.iteritems(): arraydata.set_array(x[0], numpy.array(x[1])) # return it to the execmanager new_nodes_list.append( (self.get_linkname_outarray(), arraydata)) return successful, new_nodes_list
def parse_with_retrieved(self, retrieved): """ Parse the output nodes for a PwCalculations from a dictionary of retrieved nodes. Two nodes that are expected are the default 'retrieved' FolderData node which will store the retrieved files permanently in the repository. The second required node is the unstored FolderData node with the temporary retrieved files, which should be passed under the key 'retrieved_temporary_folder_key' of the Parser class. :param retrieved: a dictionary of retrieved nodes """ import os import numpy successful = True # Load the input dictionary parameters = self._calc.inp.parameters.get_dict() # Look for optional settings input node and potential 'parser_options' dictionary within it try: settings = self._calc.inp.settings.get_dict() parser_opts = settings[self.get_parser_settings_key()] except (AttributeError, KeyError): settings = {} parser_opts = {} # Check that the retrieved folder is there try: out_folder = retrieved[self._calc._get_linkname_retrieved()] except KeyError: self.logger.error("No retrieved folder found") return False, () # Verify that the retrieved_temporary_folder is within the arguments if temporary files were specified if self._calc._get_retrieve_temporary_list(): try: temporary_folder = retrieved[self.retrieved_temporary_folder_key] dir_with_bands = temporary_folder.get_abs_path('.') except KeyError: self.logger.error('the {} was not passed as an argument'.format(self.retrieved_temporary_folder_key)) return False, () else: dir_with_bands = None list_of_files = out_folder.get_folder_list() # The stdout is required for parsing if not self._calc._OUTPUT_FILE_NAME in list_of_files: self.logger.error("The standard output file '{}' was not found but is required".format(self._calc._OUTPUT_FILE_NAME)) return False, () # The xml file is required for parsing if not self._calc._DATAFILE_XML_BASENAME in list_of_files: self.logger.error("The xml output file '{}' was not found but is required".format(self._calc._DATAFILE_XML_BASENAME)) successful = False xml_file = None else: xml_file = os.path.join(out_folder.get_abs_path('.'), self._calc._DATAFILE_XML_BASENAME) out_file = os.path.join(out_folder.get_abs_path('.'), self._calc._OUTPUT_FILE_NAME) # Call the raw parsing function parsing_args = [out_file, parameters, parser_opts, xml_file, dir_with_bands] out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output(*parsing_args) # If calculation was not considered failed already, use the new value successful = raw_successful if successful else successful # The symmetry info has large arrays, that occupy most of the database. # turns out most of this is due to 64 matrices that are repeated over and over again. # therefore I map part of the results in a list of dictionaries wrote here once and for all # if the parser_opts has a key all_symmetries set to True, I don't reduce it all_symmetries = parser_opts.get('all_symmetries', False) if not all_symmetries: try: if 'symmetries' in out_dict.keys(): old_symmetries = out_dict['symmetries'] new_symmetries = [] for this_sym in old_symmetries: name = this_sym['name'] index = None for i,this in enumerate(self._possible_symmetries): if name in this['name']: index = i if index is None: self.logger.error("Symmetry {} not found".format(name)) new_dict = {} # note: here I lose the information about equivalent # ions and fractional_translation. # They will be present with all_symmetries=True new_dict['t_rev'] = this_sym['t_rev'] new_dict['symmetry_number'] = index new_symmetries.append(new_dict) out_dict['symmetries'] = new_symmetries # and overwrite the old one except KeyError: # no symmetries were parsed (failed case, likely) self.logger.error("No symmetries were found in output") new_nodes_list = [] # I eventually save the new structure. structure_data is unnecessary after this in_struc = self._calc.get_inputs_dict()['structure'] type_calc = parameters['CONTROL']['calculation'] struc = in_struc if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']: if 'cell' in structure_data.keys(): struc = convert_qe2aiida_structure(structure_data, input_structure=in_struc) new_nodes_list.append((self.get_linkname_outstructure(), struc)) k_points_list = trajectory_data.pop('k_points', None) k_points_weights_list = trajectory_data.pop('k_points_weights', None) if k_points_list is not None: # Build the kpoints object if out_dict['k_points_units'] not in ['2 pi / Angstrom']: raise QEOutputParsingError('Error in kpoints units (should be cartesian)') kpoints_from_output = KpointsData() kpoints_from_output.set_cell_from_structure(struc) kpoints_from_output.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list) kpoints_from_input = self._calc.inp.kpoints if not bands_data: try: kpoints_from_input.get_kpoints() except AttributeError: new_nodes_list += [(self.get_linkname_out_kpoints(), kpoints_from_output)] # Converting bands into a BandsData object (including the kpoints) if bands_data: kpoints_for_bands = kpoints_from_output try: kpoints_from_input.get_kpoints() kpoints_for_bands.labels = kpoints_from_input.labels except (AttributeError, ValueError, TypeError): # AttributeError: no list of kpoints in input # ValueError: labels from input do not match the output # list of kpoints (some kpoints are missing) # TypeError: labels are not set, so kpoints_from_input.labels=None pass # Get the bands occupations and correct the occupations of QE: # If it computes only one component, it occupies it with half number of electrons try: bands_data['occupations'][1] the_occupations = bands_data['occupations'] except IndexError: the_occupations = 2.*numpy.array(bands_data['occupations'][0]) try: bands_data['bands'][1] bands_energies = bands_data['bands'] except IndexError: bands_energies = bands_data['bands'][0] the_bands_data = BandsData() the_bands_data.set_kpointsdata(kpoints_for_bands) the_bands_data.set_bands(bands_energies, units = bands_data['bands_units'], occupations = the_occupations) new_nodes_list += [('output_band', the_bands_data)] out_dict['linknames_band'] = ['output_band'] # Separate the atomic_occupations dictionary in its own node if it is present atomic_occupations = out_dict.get('atomic_occupations', {}) if atomic_occupations: out_dict.pop('atomic_occupations') atomic_occupations_node = ParameterData(dict=atomic_occupations) new_nodes_list.append(('output_atomic_occupations', atomic_occupations_node)) output_params = ParameterData(dict=out_dict) new_nodes_list.append((self.get_linkname_outparams(), output_params)) if trajectory_data: from aiida.orm.data.array.trajectory import TrajectoryData from aiida.orm.data.array import ArrayData try: positions = numpy.array( trajectory_data.pop('atomic_positions_relax')) try: cells = numpy.array( trajectory_data.pop('lattice_vectors_relax')) # if KeyError, the MD was at fixed cell except KeyError: cells = numpy.array([in_struc.cell] * len(positions)) symbols = numpy.array([str(i.kind_name) for i in in_struc.sites]) stepids = numpy.arange(len(positions)) # a growing integer per step # I will insert time parsing when they fix their issues about time # printing (logic is broken if restart is on) traj = TrajectoryData() traj.set_trajectory( stepids = stepids, cells = cells, symbols = symbols, positions = positions, ) for x in trajectory_data.iteritems(): traj.set_array(x[0],numpy.array(x[1])) new_nodes_list.append((self.get_linkname_outtrajectory(),traj)) except KeyError: # forces, atomic charges and atomic mag. moments, in scf calculation (when outputed) arraydata = ArrayData() for x in trajectory_data.iteritems(): arraydata.set_array(x[0],numpy.array(x[1])) new_nodes_list.append((self.get_linkname_outarray(),arraydata)) return successful, new_nodes_list
def parse_with_retrieved(self, retrieved): """ Receives in input a dictionary of retrieved nodes. Does all the logic here. """ from aiida.common.exceptions import InvalidOperation import os import glob successful = True # check if I'm not to overwrite anything #state = self._calc.get_state() #if state != calc_states.PARSING: # raise InvalidOperation("Calculation not in {} state" # .format(calc_states.PARSING) ) # look for eventual flags of the parser try: parser_opts = self._calc.inp.settings.get_dict()[ self.get_parser_settings_key()] except (AttributeError, KeyError): parser_opts = {} # load the input dictionary # TODO: pass this input_dict to the parser. It might need it. input_dict = self._calc.inp.parameters.get_dict() # Check that the retrieved folder is there try: out_folder = retrieved[self._calc._get_linkname_retrieved()] except KeyError: self.logger.error("No retrieved folder found") return False, () # check what is inside the folder list_of_files = out_folder.get_folder_list() # at least the stdout should exist if not self._calc._OUTPUT_FILE_NAME in list_of_files: self.logger.error("Standard output not found") successful = False return successful, () # if there is something more, I note it down, so to call the raw parser # with the right options # look for xml has_xml = False if self._calc._DATAFILE_XML_BASENAME in list_of_files: has_xml = True # look for bands has_bands = False if glob.glob(os.path.join(out_folder.get_abs_path('.'), 'K*[0-9]')): # Note: assuming format of kpoints subfolder is K*[0-9] has_bands = True # TODO: maybe it can be more general than bands only? out_file = os.path.join(out_folder.get_abs_path('.'), self._calc._OUTPUT_FILE_NAME) xml_file = os.path.join(out_folder.get_abs_path('.'), self._calc._DATAFILE_XML_BASENAME) dir_with_bands = out_folder.get_abs_path('.') # call the raw parsing function parsing_args = [out_file, input_dict, parser_opts] if has_xml: parsing_args.append(xml_file) if has_bands: if not has_xml: self.logger.warning("Cannot parse bands if xml file not " "found") else: parsing_args.append(dir_with_bands) out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output( *parsing_args) # if calculation was not considered failed already, use the new value successful = raw_successful if successful else successful # The symmetry info has large arrays, that occupy most of the database. # turns out most of this is due to 64 matrices that are repeated over and over again. # therefore I map part of the results in a list of dictionaries wrote here once and for all # if the parser_opts has a key all_symmetries set to True, I don't reduce it all_symmetries = parser_opts.get('all_symmetries', False) if not all_symmetries: try: if 'symmetries' in out_dict.keys(): old_symmetries = out_dict['symmetries'] new_symmetries = [] for this_sym in old_symmetries: name = this_sym['name'] index = None for i, this in enumerate(self._possible_symmetries): if name in this['name']: index = i if index is None: self.logger.error( "Symmetry {} not found".format(name)) new_dict = {} # note: here I lose the information about equivalent # ions and fractional_translation. # They will be present with all_symmetries=True new_dict['t_rev'] = this_sym['t_rev'] new_dict['symmetry_number'] = index new_symmetries.append(new_dict) out_dict[ 'symmetries'] = new_symmetries # and overwrite the old one except KeyError: # no symmetries were parsed (failed case, likely) self.logger.error("No symmetries were found in output") new_nodes_list = [] # I eventually save the new structure. structure_data is unnecessary after this in_struc = self._calc.get_inputs_dict()['structure'] type_calc = input_dict['CONTROL']['calculation'] struc = in_struc if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']: if 'cell' in structure_data.keys(): struc = convert_qe2aiida_structure(structure_data, input_structure=in_struc) new_nodes_list.append( (self.get_linkname_outstructure(), struc)) k_points_list = trajectory_data.pop('k_points', None) k_points_weights_list = trajectory_data.pop('k_points_weights', None) if k_points_list is not None: # build the kpoints object if out_dict['k_points_units'] not in ['2 pi / Angstrom']: raise QEOutputParsingError( 'Error in kpoints units (should be cartesian)') # converting bands into a BandsData object (including the kpoints) kpoints_from_output = KpointsData() kpoints_from_output.set_cell_from_structure(struc) kpoints_from_output.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list) kpoints_from_input = self._calc.inp.kpoints if not bands_data: try: kpoints_from_input.get_kpoints() except AttributeError: new_nodes_list += [(self.get_linkname_out_kpoints(), kpoints_from_output)] if bands_data: import numpy # converting bands into a BandsData object (including the kpoints) kpoints_for_bands = kpoints_from_output try: kpoints_from_input.get_kpoints() kpoints_for_bands.labels = kpoints_from_input.labels except (AttributeError, ValueError, TypeError): # AttributeError: no list of kpoints in input # ValueError: labels from input do not match the output # list of kpoints (some kpoints are missing) # TypeError: labels are not set, so kpoints_from_input.labels=None pass # get the bands occupations. # correct the occupations of QE: if it computes only one component, # it occupies it with half number of electrons try: bands_data['occupations'][1] the_occupations = bands_data['occupations'] except IndexError: the_occupations = 2. * numpy.array( bands_data['occupations'][0]) try: bands_data['bands'][1] bands_energies = bands_data['bands'] except IndexError: bands_energies = bands_data['bands'][0] the_bands_data = BandsData() the_bands_data.set_kpointsdata(kpoints_for_bands) the_bands_data.set_bands(bands_energies, units=bands_data['bands_units'], occupations=the_occupations) new_nodes_list += [('output_band', the_bands_data)] out_dict['linknames_band'] = ['output_band'] # convert the dictionary into an AiiDA object output_params = ParameterData(dict=out_dict) # return it to the execmanager new_nodes_list.append((self.get_linkname_outparams(), output_params)) if trajectory_data: import numpy from aiida.orm.data.array.trajectory import TrajectoryData from aiida.orm.data.array import ArrayData try: positions = numpy.array( trajectory_data.pop('atomic_positions_relax')) try: cells = numpy.array( trajectory_data.pop('lattice_vectors_relax')) # if KeyError, the MD was at fixed cell except KeyError: cells = numpy.array([in_struc.cell] * len(positions)) symbols = numpy.array( [str(i.kind_name) for i in in_struc.sites]) stepids = numpy.arange( len(positions)) # a growing integer per step # I will insert time parsing when they fix their issues about time # printing (logic is broken if restart is on) traj = TrajectoryData() traj.set_trajectory( stepids=stepids, cells=cells, symbols=symbols, positions=positions, ) for x in trajectory_data.iteritems(): traj.set_array(x[0], numpy.array(x[1])) # return it to the execmanager new_nodes_list.append( (self.get_linkname_outtrajectory(), traj)) except KeyError: # forces, atomic charges and atomic mag. moments, in scf # calculation (when outputed) arraydata = ArrayData() for x in trajectory_data.iteritems(): arraydata.set_array(x[0], numpy.array(x[1])) # return it to the execmanager new_nodes_list.append( (self.get_linkname_outarray(), arraydata)) return successful, new_nodes_list
def parse_with_retrieved(self, retrieved): """ Parses the calculation-output datafolder, and stores results. :param retrieved: a dictionary of retrieved nodes, where the keys are the link names of retrieved nodes, and the values are the nodes. """ from aiida.common.exceptions import InvalidOperation from aiida.orm.data.array.trajectory import TrajectoryData from aiida.orm.data.array import ArrayData import os import numpy import copy successful = True # check if I'm not to overwrite anything #state = self._calc.get_state() #if state != calc_states.PARSING: # raise InvalidOperation("Calculation not in {} state" # .format(calc_states.PARSING) ) # look for eventual flags of the parser try: parser_opts = self._calc.inp.settings.get_dict()[ self.get_parser_settings_key()] except (AttributeError, KeyError): parser_opts = {} # load the pw input dictionary pw_input_dict = self._calc.inp.pw_parameters.get_dict() # load the pw input dictionary neb_input_dict = self._calc.inp.neb_parameters.get_dict() # Check that the retrieved folder is there try: out_folder = retrieved[self._calc._get_linkname_retrieved()] except KeyError: self.logger.error("No retrieved folder found") successful = False return successful, () # check what is inside the folder list_of_files = out_folder.get_folder_list() # at least the stdout should exist if not self._calc._OUTPUT_FILE_NAME in list_of_files: self.logger.error("Standard output not found") successful = False return successful, () out_file = os.path.join(out_folder.get_abs_path('.'), self._calc._OUTPUT_FILE_NAME) # First parse the Neb output neb_out_dict, iteration_data, raw_successful = parse_raw_output_neb( out_file, neb_input_dict) # if calculation was not considered failed already, use the new value successful = raw_successful if successful else successful # Retrieve the number of images try: num_images = neb_input_dict['num_of_images'] except KeyError: try: num_images = neb_out_dict['num_of_images'] except KeyError: self.logger.error( "Impossible to understand the number of images") successful = False return successful, () # Now parse the information from the single pw calculations for the different images image_data = {} positions = [] cells = [] for i in range(num_images): # look for xml and parse xml_file = os.path.join(out_folder.get_abs_path('.'), self._calc._PREFIX + '_{}'.format(i + 1), self._calc._PREFIX + '.save', self._calc._DATAFILE_XML_BASENAME) try: with open(xml_file, 'r') as f: xml_lines = f.read() # Note: read() and not readlines() except IOError: self.logger.error( "No xml file found for image {} at {}".format( i + 1, xml_file)) successful = False return successful, () xml_data, structure_dict, bands_data = parse_pw_xml_output( xml_lines) # convert the dictionary obtained from parsing the xml to an AiiDA StructureData structure_data = convert_qe2aiida_structure(structure_dict) # look for pw output and parse it pw_out_file = os.path.join( out_folder.get_abs_path('.'), self._calc._PREFIX + '_{}'.format(i + 1), 'PW.out') try: with open(pw_out_file, 'r') as f: pw_out_lines = f.read() # Note: read() and not readlines() except IOError: self.logger.error( "No pw output file found for image {}".format(i + 1)) successful = False return successful, () pw_out_data, trajectory_data, critical_messages = parse_pw_text_output( pw_out_lines, xml_data, structure_dict, pw_input_dict) # I add in the out_data all the last elements of trajectory_data values. # Safe for some large arrays, that I will likely never query. skip_keys = [ 'forces', 'atomic_magnetic_moments', 'atomic_charges', 'lattice_vectors_relax', 'atomic_positions_relax', 'atomic_species_name' ] tmp_trajectory_data = copy.copy(trajectory_data) for x in tmp_trajectory_data.iteritems(): if x[0] in skip_keys: continue pw_out_data[x[0]] = x[1][-1] if len(x[1]) == 1: # delete eventual keys that are not arrays trajectory_data.pop(x[0]) # As the k points are an array that is rather large, and again it's not something I'm going to parse likely # since it's an info mainly contained in the input file, I move it to the trajectory data for key in ['k_points', 'k_points_weights']: try: trajectory_data[key] = xml_data.pop(key) except KeyError: pass key = 'pw_output_image_{}'.format(i + 1) image_data[key] = dict(pw_out_data.items() + xml_data.items()) positions.append([site.position for site in structure_data.sites]) cells.append(structure_data.cell) # If a warning was already present in the NEB, add also PW warnings to the neb output data, # avoiding repetitions. if neb_out_dict['warnings']: for warning in pw_out_data['warnings']: if warning not in neb_out_dict['warnings']: neb_out_dict['warnings'].append(warning) # Symbols can be obtained simply from the last image symbols = [str(site.kind_name) for site in structure_data.sites] new_nodes_list = [] # convert the dictionary into an AiiDA object output_params = ParameterData(dict=dict(neb_out_dict.items() + image_data.items())) # return it to the execmanager new_nodes_list.append((self.get_linkname_outparams(), output_params)) # convert data on structure of images into a TrajectoryData traj = TrajectoryData() traj.set_trajectory( stepids=numpy.arange(1, num_images + 1), cells=numpy.array(cells), symbols=numpy.array(symbols), positions=numpy.array(positions), ) # return it to the execmanager new_nodes_list.append((self.get_linkname_outtrajectory(), traj)) if parser_opts.get('all_iterations', False): if iteration_data: from aiida.orm.data.array import ArrayData arraydata = ArrayData() for x in iteration_data.iteritems(): arraydata.set_array(x[0], numpy.array(x[1])) new_nodes_list.append( (self.get_linkname_iterationarray(), arraydata)) # Load the original and interpolated energy profile along the minimum-energy path (mep) try: mep_file = os.path.join(out_folder.get_abs_path('.'), self._calc._PREFIX + '.dat') mep = numpy.loadtxt(mep_file) except Exception: self.logger.warning( "Impossible to find the file with image energies " "versus reaction coordinate.") mep = numpy.array([[]]) try: interp_mep_file = os.path.join(out_folder.get_abs_path('.'), self._calc._PREFIX + '.int') interp_mep = numpy.loadtxt(interp_mep_file) except Exception: self.logger.warning( "Impossible to find the file with the interpolation " "of image energies versus reaction coordinate.") interp_mep = numpy.array([[]]) # Create an ArrayData with the energy profiles mep_arraydata = ArrayData() mep_arraydata.set_array('mep', mep) mep_arraydata.set_array('interpolated_mep', interp_mep) new_nodes_list.append((self.get_linkname_meparray(), mep_arraydata)) return successful, new_nodes_list
def parse(self, **kwargs): """Parse the retrieved files of a completed `NebCalculation` into output nodes. Two nodes that are expected are the default 'retrieved' `FolderData` node which will store the retrieved files permanently in the repository. The second required node is a filepath under the key `retrieved_temporary_files` which should contain the temporary retrieved files. """ from aiida.orm import TrajectoryData, ArrayData import os import numpy PREFIX = self.node.process_class._PREFIX # Check that the retrieved folder is there try: out_folder = self.retrieved except NotExistent: return self.exit_codes.ERROR_NO_RETRIEVED_FOLDER list_of_files = out_folder.list_object_names( ) # Note: this includes folders, but not the files they contain. # The stdout is required for parsing filename_stdout = self.node.get_attribute('output_filename') if filename_stdout not in list_of_files: self.logger.error( "The standard output file '{}' was not found but is required". format(filename_stdout)) return self.exit_codes.ERROR_READING_OUTPUT_FILE # Look for optional settings input node and potential 'parser_options' dictionary within it # Note that we look for both NEB and PW parser options under "inputs.settings.parser_options"; # we don't even have a namespace "inputs.pw.settings". try: settings = self.node.inputs.settings.get_dict() parser_options = settings[self.get_parser_settings_key()] except (AttributeError, KeyError, NotExistent): settings = {} parser_options = {} try: include_deprecated_v2_keys = parser_options[ 'include_deprecated_v2_keys'] except (TypeError, KeyError): include_deprecated_v2_keys = False # load the pw input parameters dictionary pw_input_dict = self.node.inputs.pw__parameters.get_dict() # load the neb input parameters dictionary neb_input_dict = self.node.inputs.parameters.get_dict() stdout_abspath = os.path.join( out_folder._repository._get_base_folder().abspath, filename_stdout) # First parse the Neb output try: neb_out_dict, iteration_data, raw_successful = parse_raw_output_neb( stdout_abspath, neb_input_dict) # TODO: why do we ignore raw_successful ? except QEOutputParsingError as exc: self.logger.error( 'QEOutputParsingError in parse_raw_output_neb: {}'.format(exc)) return self.exit_codes.ERROR_READING_OUTPUT_FILE for warn_type in ['warnings', 'parser_warnings']: for message in neb_out_dict[warn_type]: self.logger.warning('parsing NEB output: {}'.format(message)) if 'QE neb run did not reach the end of the execution.' in neb_out_dict[ 'parser_warnings']: return self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE # Retrieve the number of images try: num_images = neb_input_dict['num_of_images'] except KeyError: try: num_images = neb_out_dict['num_of_images'] except KeyError: self.logger.error( 'Impossible to understand the number of images') return self.exit_codes.ERROR_INVALID_OUTPUT if num_images < 2: self.logger.error('Too few images: {}'.format(num_images)) return self.exit_codes.ERROR_INVALID_OUTPUT # Now parse the information from the individual pw calculations for the different images image_data = {} positions = [] cells = [] # for each image... for i in range(num_images): # check if any of the known XML output file names are present, and parse the first that we find relative_output_folder = os.path.join( '{}_{}'.format(PREFIX, i + 1), '{}.save'.format(PREFIX)) retrieved_files = self.retrieved.list_object_names( relative_output_folder) for xml_filename in PwCalculation.xml_filenames: if xml_filename in retrieved_files: xml_file_path = os.path.join(relative_output_folder, xml_filename) try: with out_folder.open(xml_file_path) as xml_file: parsed_data_xml, logs_xml = parse_pw_xml( xml_file, None, include_deprecated_v2_keys) except IOError: return self.exit_codes.ERROR_OUTPUT_XML_READ except XMLParseError: return self.exit_codes.ERROR_OUTPUT_XML_PARSE except XMLUnsupportedFormatError: return self.exit_codes.ERROR_OUTPUT_XML_FORMAT except Exception: import traceback traceback.print_exc() return self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION # this image is dealt with, so break the inner loop and go to the next image break # otherwise, if none of the filenames we tried exists, exit with an error else: self.logger.error( 'No xml output file found for image {}'.format(i + 1)) return self.exit_codes.ERROR_MISSING_XML_FILE # look for pw output and parse it pw_out_file = os.path.join('{}_{}'.format(PREFIX, i + 1), 'PW.out') try: with out_folder.open(pw_out_file, 'r') as f: pw_out_text = f.read() # Note: read() and not readlines() except IOError: self.logger.error( 'No pw output file found for image {}'.format(i + 1)) return self.exit_codes.ERROR_READING_OUTPUT_FILE try: parsed_data_stdout, logs_stdout = parse_pw_stdout( pw_out_text, pw_input_dict, parser_options, parsed_data_xml) except Exception: import traceback traceback.print_exc() return self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION parsed_structure = parsed_data_stdout.pop('structure', {}) parsed_trajectory = parsed_data_stdout.pop('trajectory', {}) parsed_parameters = PwParser.build_output_parameters( parsed_data_xml, parsed_data_stdout) # Explicit information about k-points does not need to be queryable so we remove it from the parameters parsed_parameters.pop('k_points', None) parsed_parameters.pop('k_points_units', None) parsed_parameters.pop('k_points_weights', None) # Delete bands # TODO: this is just to make pytest happy; do we want to keep them instead? parsed_parameters.pop('bands', None) # Append the last frame of some of the smaller trajectory arrays to the parameters for easy querying PwParser.final_trajectory_frame_to_parameters( parsed_parameters, parsed_trajectory) # If the parser option 'all_symmetries' is False, we reduce the raw parsed symmetries to save space all_symmetries = False if parser_options is None else parser_options.get( 'all_symmetries', False) if not all_symmetries and 'cell' in parsed_structure: reduce_symmetries(parsed_parameters, parsed_structure, self.logger) structure_data = convert_qe2aiida_structure(parsed_structure) key = 'pw_output_image_{}'.format(i + 1) image_data[key] = parsed_parameters positions.append([site.position for site in structure_data.sites]) cells.append(structure_data.cell) # Add also PW warnings and errors to the neb output data, avoiding repetitions. for log_type in ['warning', 'error']: for message in logs_stdout[log_type]: formatted_message = '{}: {}'.format(log_type, message) if formatted_message not in neb_out_dict['warnings']: neb_out_dict['warnings'].append(formatted_message) # Symbols can be obtained simply from the last image symbols = [str(site.kind_name) for site in structure_data.sites] output_params = Dict( dict=dict(list(neb_out_dict.items()) + list(image_data.items()))) self.out('output_parameters', output_params) trajectory = TrajectoryData() trajectory.set_trajectory( stepids=numpy.arange(1, num_images + 1), cells=numpy.array(cells), symbols=symbols, positions=numpy.array(positions), ) self.out('output_trajectory', trajectory) if parser_options is not None and parser_options.get( 'all_iterations', False): if iteration_data: arraydata = ArrayData() for k, v in six.iteritems(iteration_data): arraydata.set_array(k, numpy.array(v)) self.out('iteration_array', arraydata) # Load the original and interpolated energy profile along the minimum-energy path (mep) try: filename = PREFIX + '.dat' with out_folder.open(filename, 'r') as handle: mep = numpy.loadtxt(handle) except Exception: self.logger.warning( 'could not open expected output file `{}`.'.format(filename)) mep = numpy.array([[]]) try: filename = PREFIX + '.int' with out_folder.open(filename, 'r') as handle: interp_mep = numpy.loadtxt(handle) except Exception: self.logger.warning( 'could not open expected output file `{}`.'.format(filename)) interp_mep = numpy.array([[]]) # Create an ArrayData with the energy profiles mep_arraydata = ArrayData() mep_arraydata.set_array('mep', mep) mep_arraydata.set_array('interpolated_mep', interp_mep) self.out('output_mep', mep_arraydata) return
def parse_with_retrieved(self, retrieved): """ Parse the output nodes for a PwCalculations from a dictionary of retrieved nodes. Two nodes that are expected are the default 'retrieved' FolderData node which will store the retrieved files permanently in the repository. The second required node is the unstored FolderData node with the temporary retrieved files, which should be passed under the key 'retrieved_temporary_folder_key' of the Parser class. :param retrieved: a dictionary of retrieved nodes """ import os successful = True # Load the input dictionary parameters = self._calc.inp.parameters.get_dict() # Look for optional settings input node and potential 'parser_options' dictionary within it try: settings = self._calc.inp.settings.get_dict() parser_opts = settings[self.get_parser_settings_key()] except (AttributeError, KeyError): settings = {} parser_opts = {} # Check that the retrieved folder is there try: out_folder = retrieved[self._calc._get_linkname_retrieved()] except KeyError: self.logger.error("No retrieved folder found") return False, () # Verify that the retrieved_temporary_folder is within the arguments if temporary files were specified if self._calc._get_retrieve_temporary_list(): try: temporary_folder = retrieved[ self.retrieved_temporary_folder_key] dir_with_bands = temporary_folder.get_abs_path('.') except KeyError: self.logger.error( 'the {} was not passed as an argument'.format( self.retrieved_temporary_folder_key)) return False, () else: dir_with_bands = None list_of_files = out_folder.get_folder_list() # The stdout is required for parsing if self._calc._OUTPUT_FILE_NAME not in list_of_files: self.logger.error( "The standard output file '{}' was not found but is required". format(self._calc._OUTPUT_FILE_NAME)) return False, () # The xml file is required for parsing if self._calc._DATAFILE_XML_BASENAME not in list_of_files: self.logger.error( "The xml output file '{}' was not found but is required". format(self._calc._DATAFILE_XML_BASENAME)) successful = False xml_file = None else: xml_file = os.path.join(out_folder.get_abs_path('.'), self._calc._DATAFILE_XML_BASENAME) out_file = os.path.join(out_folder.get_abs_path('.'), self._calc._OUTPUT_FILE_NAME) # Call the raw parsing function parsing_args = [ out_file, parameters, parser_opts, xml_file, dir_with_bands ] out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output( *parsing_args) # If calculation was not considered failed already, use the new value successful = raw_successful if successful else successful # If the parser option 'all_symmetries' is not set to True, we reduce the raw parsed symmetries to safe space all_symmetries = parser_opts.get('all_symmetries', False) if not all_symmetries: # In the standard output, each symmetry operation print two rotation matrices: # # * S_cryst^T: matrix in crystal coordinates, transposed # * S_cart: matrix in cartesian coordinates, # # The XML files only print one matrix: # # * S_cryst: matrix in crystal coordinates # # The raw parsed symmetry information from the XML is large and will load the database heavily if stored as # is for each calculation. Instead, we will map these dictionaries onto a static dictionary of rotation # matrices generated by the _get_qe_symmetry_list static method. This dictionary will return the rotation # matrices in cartesian coordinates, i.e. S_cart. In order to compare the raw matrices from the XML to these # static matrices we have to convert S_cryst into S_cart. We derive here how that is done: # # S_cryst * v_cryst = v_cryst' # # where v_cryst' is the rotated vector v_cryst under S_cryst # We define `cell` where cell vectors are rows. Converting a vector from crystal to cartesian # coordinates is defined as: # # cell^T * v_cryst = v_cart # # The inverse of this operation is defined as # # v_cryst = cell^Tinv * v_cart # # Replacing the last equation into the first we find: # # S_cryst * cell^Tinv * v_cart = cell^Tinv * v_cart' # # Multiply on the left with cell^T gives: # # cell^T * S_cryst * cell^Tinv * v_cart = v_cart' # # which can be rewritten as: # # S_cart * v_cart = v_cart' # # where: # # S_cart = cell^T * S_cryst * cell^Tinv # # We compute here the transpose and its inverse of the structure cell basis, which is needed to transform # the parsed rotation matrices, which are in crystal coordinates, to cartesian coordinates, which are the # matrices that are returned by the _get_qe_symmetry_list staticmethod cell = structure_data['cell']['lattice_vectors'] cell_T = numpy.transpose(cell) cell_Tinv = numpy.linalg.inv(cell_T) try: if 'symmetries' in out_dict.keys(): old_symmetries = out_dict['symmetries'] new_symmetries = [] for this_sym in old_symmetries: name = this_sym['name'].strip() for i, this in enumerate(self._possible_symmetries): # Since we do an exact comparison we strip the string name from whitespace # and as soon as it is matched, we break to prevent it from matching another if name == this['name'].strip(): index = i break else: index = None self.logger.error( 'Symmetry {} not found'.format(name)) new_dict = {} if index is not None: # The raw parsed rotation matrix is in crystal coordinates, whereas the mapped rotation # in self._possible_symmetries is in cartesian coordinates. To allow them to be compared # to make sure we matched the correct rotation symmetry, we first convert the parsed matrix # to cartesian coordinates. For explanation of the method, see comment above. rotation_cryst = this_sym['rotation'] rotation_cart_new = self._possible_symmetries[ index]['matrix'] rotation_cart_old = numpy.dot( cell_T, numpy.dot(rotation_cryst, cell_Tinv)) inversion = self._possible_symmetries[index][ 'inversion'] if not are_matrices_equal( rotation_cart_old, rotation_cart_new, swap_sign_matrix_b=inversion): self.logger.error( 'Mapped rotation matrix {} does not match the original rotation {}' .format(rotation_cart_new, rotation_cart_old)) new_dict['all_symmetries'] = this_sym else: # Note: here I lose the information about equivalent ions and fractional_translation. new_dict['t_rev'] = this_sym['t_rev'] new_dict['symmetry_number'] = index else: new_dict['all_symmetries'] = this_sym new_symmetries.append(new_dict) out_dict[ 'symmetries'] = new_symmetries # and overwrite the old one except KeyError: # no symmetries were parsed (failed case, likely) self.logger.error("No symmetries were found in output") new_nodes_list = [] # I eventually save the new structure. structure_data is unnecessary after this in_struc = self._calc.get_inputs_dict()['structure'] type_calc = parameters['CONTROL']['calculation'] struc = in_struc if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']: if 'cell' in structure_data.keys(): struc = convert_qe2aiida_structure(structure_data, input_structure=in_struc) new_nodes_list.append( (self.get_linkname_outstructure(), struc)) k_points_list = trajectory_data.pop('k_points', None) k_points_weights_list = trajectory_data.pop('k_points_weights', None) if k_points_list is not None: # Build the kpoints object if out_dict['k_points_units'] not in ['2 pi / Angstrom']: raise QEOutputParsingError( 'Error in kpoints units (should be cartesian)') kpoints_from_output = KpointsData() kpoints_from_output.set_cell_from_structure(struc) kpoints_from_output.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list) kpoints_from_input = self._calc.inp.kpoints if not bands_data: try: kpoints_from_input.get_kpoints() except AttributeError: new_nodes_list += [(self.get_linkname_out_kpoints(), kpoints_from_output)] # Converting bands into a BandsData object (including the kpoints) if bands_data: kpoints_for_bands = kpoints_from_output try: kpoints_from_input.get_kpoints() kpoints_for_bands.labels = kpoints_from_input.labels except (AttributeError, ValueError, TypeError): # AttributeError: no list of kpoints in input # ValueError: labels from input do not match the output # list of kpoints (some kpoints are missing) # TypeError: labels are not set, so kpoints_from_input.labels=None pass # Get the bands occupations and correct the occupations of QE: # If it computes only one component, it occupies it with half number of electrons try: bands_data['occupations'][1] the_occupations = bands_data['occupations'] except IndexError: the_occupations = 2. * numpy.array( bands_data['occupations'][0]) try: bands_data['bands'][1] bands_energies = bands_data['bands'] except IndexError: bands_energies = bands_data['bands'][0] the_bands_data = BandsData() the_bands_data.set_kpointsdata(kpoints_for_bands) the_bands_data.set_bands(bands_energies, units=bands_data['bands_units'], occupations=the_occupations) new_nodes_list += [('output_band', the_bands_data)] out_dict['linknames_band'] = ['output_band'] # Separate the atomic_occupations dictionary in its own node if it is present atomic_occupations = out_dict.get('atomic_occupations', {}) if atomic_occupations: out_dict.pop('atomic_occupations') atomic_occupations_node = ParameterData(dict=atomic_occupations) new_nodes_list.append( ('output_atomic_occupations', atomic_occupations_node)) output_params = ParameterData(dict=out_dict) new_nodes_list.append((self.get_linkname_outparams(), output_params)) if trajectory_data: from aiida.orm.data.array.trajectory import TrajectoryData from aiida.orm.data.array import ArrayData try: positions = numpy.array( trajectory_data.pop('atomic_positions_relax')) try: cells = numpy.array( trajectory_data.pop('lattice_vectors_relax')) # if KeyError, the MD was at fixed cell except KeyError: cells = numpy.array([in_struc.cell] * len(positions)) symbols = numpy.array( [str(i.kind_name) for i in in_struc.sites]) stepids = numpy.arange( len(positions)) # a growing integer per step # I will insert time parsing when they fix their issues about time # printing (logic is broken if restart is on) traj = TrajectoryData() traj.set_trajectory( stepids=stepids, cells=cells, symbols=symbols, positions=positions, ) for x in trajectory_data.iteritems(): traj.set_array(x[0], numpy.array(x[1])) new_nodes_list.append( (self.get_linkname_outtrajectory(), traj)) except KeyError: # forces, atomic charges and atomic mag. moments, in scf calculation (when outputed) arraydata = ArrayData() for x in trajectory_data.iteritems(): arraydata.set_array(x[0], numpy.array(x[1])) new_nodes_list.append( (self.get_linkname_outarray(), arraydata)) return successful, new_nodes_list