def test_outxml_valid_outxml(outxmlfilepath): """ test if valid inp.xml files are recognized by the inpxml_parser """ from lxml import etree #Pass outxmlfile out_dict = outxml_parser(outxmlfilepath) assert out_dict is not None assert isinstance(out_dict, dict) assert out_dict != {} #Parse before parser = etree.XMLParser(attribute_defaults=True, encoding='utf-8') xmltree = etree.parse(outxmlfilepath, parser) out_dict = outxml_parser(xmltree) assert out_dict is not None assert isinstance(out_dict, dict) assert out_dict != {} #call with contextmanager with open(outxmlfilepath, 'r') as outfile: out_dict = outxml_parser(outfile) assert out_dict is not None assert isinstance(out_dict, dict) assert out_dict != {}
def test_outxml_additional_tasks_allattribs(data_regression): """ Test the definition of additional tasks (reading an all attributes of a tag) """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'SiLOXML/files/out.xml') TEST_TASK_ITERATION_ALLATTRIBS = { 'core_states': { 'core_info': { 'parse_type': 'allAttribs', 'path_spec': { 'name': 'coreStates' }, 'subdict': 'core_info' } } } out_dict = outxml_parser(OUTXML_FILEPATH, additional_tasks=TEST_TASK_ITERATION_ALLATTRIBS) data_regression.check({ 'output_dict': out_dict, }) TEST_TASK_ITERATION_ALLATTRIBS_INVALID = { 'core_states': { 'core_info': { 'parse_type': 'allAttribs', 'path_spec': { 'name': 'coreStates' }, 'overwrite_last': True } } } with pytest.raises(ValueError, match="Got extra Keys: {'overwrite_last'}"): out_dict = outxml_parser(OUTXML_FILEPATH, additional_tasks=TEST_TASK_ITERATION_ALLATTRIBS_INVALID)
def test_outxml_add_tasks_overwrite(data_regression): """ Test the overwriting of tasks """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'SiLOXML/files/out.xml') REPLACE_BANDGAP = { 'bandgap': { 'bandgap': { 'parse_type': 'singleValue', 'path_spec': { 'name': 'freeEnergy' }, 'only_required': True } } } with pytest.raises(ValueError, match="Task 'bandgap' is already defined."): out_dict = outxml_parser(OUTXML_FILEPATH, additional_tasks=REPLACE_BANDGAP) out_dict = outxml_parser(OUTXML_FILEPATH, additional_tasks=REPLACE_BANDGAP, overwrite=True) data_regression.check({ 'output_dict': out_dict, })
def test_outxml_add_tasks_append(data_regression): """ Test the append option for defining additional tasks """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'SiLOXML/files/out.xml') #Move the number_of_atom_types from general_out_info to general_inp_info #and write the comment from the inp.xml into it #This tests both the correct insertin in general_inp_info and that inner keys can be #overwritten in general_out_info REPLACE_DICT = { 'general_out_info': { 'number_of_atom_types': {} }, 'general_inp_info': { 'number_of_atom_types': { 'parse_type': 'text', 'path_spec': { 'name': 'comment' } } } } with pytest.raises(ValueError, match="Task 'general_out_info' is already defined."): out_dict = outxml_parser(OUTXML_FILEPATH, additional_tasks=REPLACE_DICT) out_dict = outxml_parser(OUTXML_FILEPATH, additional_tasks=REPLACE_DICT, append=True) data_regression.check({ 'output_dict': out_dict, })
def test_outxml_invalid_iteration(): """ Test the output parser with invaid iteration to parse arguments """ #output version does not exist (InputSchema is loaded first so this is the raised error) OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'SiLOXML/files/out.xml') with pytest.raises(ValueError, match=r"Valid values are: 'first', 'last', 'all', or int"): out_dict = outxml_parser(OUTXML_FILEPATH, iteration_to_parse=('Test', 3)) pprint(out_dict) with pytest.raises(ValueError, match=r"Got '999'; but only '6' iterations are available"): out_dict = outxml_parser(OUTXML_FILEPATH, iteration_to_parse=999)
def test_outxml_incompatible_versions(): """ Test the output parser against files with broken/wrong or unsupported version strings """ #output version does not exist OUTXML_FILEPATH1 = os.path.join(outxmlfilefolder, 'files/fleur/broken_out_xml/non_existing_version.xml') with pytest.raises(FileNotFoundError, match='No FleurOutputSchema.xsd found'): out_dict = outxml_parser(OUTXML_FILEPATH1) #version string 0.27 and programVersion='fleur 27' not supported OUTXML_FILEPATH1 = os.path.join(outxmlfilefolder, 'files/fleur/broken_out_xml/non_supported_version.xml') with pytest.raises(ValueError, match="Unknown fleur version: File-version '0.27' Program-version 'fleur 20'"): out_dict = outxml_parser(OUTXML_FILEPATH1)
def test_outxml_validation_errors(data_regression, clean_parser_log): """ Test the output parser against files for detecting validation """ OUTXML_FILEPATH1 = os.path.join(outxmlfilefolder, 'files/fleur/broken_out_xml/simple_validation_error.xml') with pytest.raises(ValueError, match='Output file does not validate against the schema:'): out_dict = outxml_parser(OUTXML_FILEPATH1) warnings = {} out_dict = outxml_parser(OUTXML_FILEPATH1, ignore_validation=True, parser_info_out=warnings) data_regression.check({'output_dict': out_dict, 'warnings': clean_parser_log(warnings)})
def test_outxml_empty_out(data_regression, clean_parser_log): """ Test the output parser against empty file """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder, 'files/fleur/broken_out_xml/empty_out.xml') warnings = {} out_dict = outxml_parser(OUTXML_FILEPATH, parser_info_out=warnings) data_regression.check({'output_dict': out_dict, 'warnings': clean_parser_log(warnings)})
def test_outxml_ldaurelax(data_regression): """ Test the outxml_parser for LDA+U and forces """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'GaAsMultiUForceXML/files/out.xml') out_dict = outxml_parser(OUTXML_FILEPATH, iteration_to_parse='all') data_regression.check({ 'output_dict': out_dict, })
def test_outxml_minimal_mode(data_regression): """ Test the minimal mode of the outxml_parser """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'SiLOXML/files/out.xml') out_dict = outxml_parser(OUTXML_FILEPATH, iteration_to_parse='all', minimal_mode=True) data_regression.check({ 'output_dict': out_dict, })
def test_outxml_magnetic(data_regression): """ Test the outxml_parser for magnetic calculations """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'Fe_bct_LOXML/files/out.xml') out_dict = outxml_parser(OUTXML_FILEPATH, iteration_to_parse='all') data_regression.check({ 'output_dict': out_dict, })
def test_outxml_alliter(data_regression): """ Test the parsing of all available iterations """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'SiLOXML/files/out.xml') out_dict = outxml_parser(OUTXML_FILEPATH, iteration_to_parse='all') data_regression.check({ 'output_dict': out_dict, })
def test_outxml_indexiter(data_regression): """ Test the parsing of an iteration specified by index """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'SiLOXML/files/out.xml') out_dict = outxml_parser(OUTXML_FILEPATH, iteration_to_parse=3) data_regression.check({ 'output_dict': out_dict, })
def test_outxml_firstiter(data_regression): """ Test the parsing of only the first iteration """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'SiLOXML/files/out.xml') out_dict = outxml_parser(OUTXML_FILEPATH, iteration_to_parse='first') data_regression.check({ 'output_dict': out_dict, })
def test_outxml_broken_firstiter(data_regression, clean_parser_log): """ Test the output parser against a file which terminates in the first iteration """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder, 'files/fleur/broken_out_xml/terminated_firstit.xml') warnings = {} out_dict = outxml_parser(OUTXML_FILEPATH, parser_info_out=warnings) data_regression.check({ 'output_dict': out_dict, 'warnings': clean_parser_log(warnings), })
def test_outxml_plot(data_regression): """ Test the outxml_parser for a forcetheorem calculation """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'SiFilmSlicePlotXML/files/out.xml') out_dict = outxml_parser(OUTXML_FILEPATH) data_regression.check({ 'output_dict': out_dict, })
def test_outxml_force(data_regression): """ Test the outxml_parser for a forcetheorem calculation """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'FePt_film_SSFT_LO/files/out.xml') out_dict = outxml_parser(OUTXML_FILEPATH, iteration_to_parse='all') data_regression.check({ 'output_dict': out_dict, })
def test_outxml_differing_versions(data_regression, clean_parser_log): """ Test if files with different input/output versions are parsed correctly """ OUTXML_FILEPATH = os.path.abspath(os.path.join(outxmlfilefolder, 'files/fleur/output_mixed_versions.xml')) warnings = {} out_dict = outxml_parser(OUTXML_FILEPATH, parser_info_out=warnings) data_regression.check({ 'output_dict': out_dict, 'warnings': clean_parser_log(warnings), })
def test_outxml_max5_0_compatibility(data_regression, clean_parser_log): """ Test if Max5.0 output files are processed correctly """ OUTXML_FILEPATH = os.path.abspath(os.path.join(outxmlfilefolder, 'files/fleur/old_versions/Max5_0_test_out.xml')) warnings = {} out_dict = outxml_parser(OUTXML_FILEPATH, parser_info_out=warnings, iteration_to_parse='all') data_regression.check({ 'output_dict': out_dict, 'warnings': clean_parser_log(warnings), })
def test_outxml_newer_version(data_regression, clean_parser_log): """ Test if files with not yet existent versions are parsed correctly (fallback to last available) """ OUTXML_FILEPATH = os.path.abspath(os.path.join(outxmlfilefolder, 'files/fleur/output_newer_version.xml')) warnings = {} out_dict = outxml_parser(OUTXML_FILEPATH, parser_info_out=warnings) data_regression.check({ 'output_dict': out_dict, 'warnings': clean_parser_log(warnings), })
def test_outxml_pre_max3_1compatibility(data_regression, clean_parser_log): """ Test if older than Max3.1 output files are processed correctly (and a warning should be shown for this case) """ OUTXML_FILEPATH = os.path.abspath(os.path.join(outxmlfilefolder, 'files/fleur/old_versions/Max3_0_test_out.xml')) warnings = {} with pytest.warns(UserWarning): out_dict = outxml_parser(OUTXML_FILEPATH, parser_info_out=warnings, iteration_to_parse='all') data_regression.check({ 'output_dict': out_dict, 'warnings': clean_parser_log(warnings), })
def test_outxml_additional_tasks_simple(data_regression): """ Test the definition of additional tasks (resding an attribute) """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder_valid[0], 'SiLOXML/files/out.xml') TEST_TASK_ITERATION = { 'core_states': { 'core_eig_val_sum': { 'parse_type': 'attrib', 'path_spec': { 'name': 'eigValSum' } } } } out_dict = outxml_parser(OUTXML_FILEPATH, additional_tasks=TEST_TASK_ITERATION) data_regression.check({ 'output_dict': out_dict, }) TEST_TASK_ITERATION_INVALID = { 'core_states': { 'core_eig_val_sum': { 'parse_type': 'attrib', 'path_spec': { 'name': 'eigValSum' }, 'ignore': ['eigValSum'] } } } with pytest.raises(ValueError, match="Got extra Keys: {'ignore'}"): out_dict = outxml_parser(OUTXML_FILEPATH, additional_tasks=TEST_TASK_ITERATION_INVALID)
def test_outxml_garbage_values(data_regression, clean_parser_log): """ Test the behaviour of the ouput parser when encountering NaN, Inf or fortran formatting errors **** """ OUTXML_FILEPATH = os.path.join(outxmlfilefolder, 'files/fleur/broken_out_xml/garbage_values.xml') def isNaN(num): return math.isnan(num) #num != num warnings = {} out_dict = outxml_parser(OUTXML_FILEPATH, ignore_validation=True, parser_info_out=warnings) assert isNaN(out_dict['fermi_energy']) assert out_dict['magnetic_moments'] == '********' assert out_dict['total_charge'] == float('Inf') data_regression.check({'warnings': clean_parser_log(warnings)})
def get_res(self): """ Check how the last Fleur calculation went Parse some results. """ self.report('INFO: get results FLEUR') mode = self.ctx.wf_dict.get('mode') if self.ctx.parse_last: last_base_wc = self.ctx.last_base_wc fleur_calcjob = load_node( find_last_submitted_calcjob(last_base_wc)) walltime = last_base_wc.outputs.output_parameters.dict.walltime if isinstance(walltime, int): self.ctx.total_wall_time = self.ctx.total_wall_time + walltime with fleur_calcjob.outputs.retrieved.open( fleur_calcjob.process_class._OUTXML_FILE_NAME, 'r') as outxmlfile: output_dict = outxml_parser(outxmlfile, minimal_mode=True, list_return=True, iteration_to_parse='all', ignore_validation=True) energies = output_dict.get('energy_hartree', []) if energies is not None: self.ctx.total_energy.extend(energies) if 'overall_density_convergence' in output_dict: distances = output_dict['overall_density_convergence'] else: distances = output_dict.get('density_convergence', []) if distances is not None: self.ctx.distance.extend(distances) if 'ldau_info' in output_dict: nmmp_distances = output_dict['ldau_info'].get( 'nmmp_distances', []) if nmmp_distances is not None: self.ctx.nmmp_distance.extend(nmmp_distances) if mode == 'force': forces = output_dict.get('force_atoms', []) if forces is not None: for force_iter in forces: self.ctx.all_forces.append( [force for atom, force in force_iter]) else: errormsg = 'ERROR: scf wc was not successful, check log for details' self.control_end_wc(errormsg) return self.exit_codes.ERROR_FLEUR_CALCULATION_FAILED if not self.ctx.distance: # if fleur relaxes an already converged crystal it stops directly if mode == 'force': self.report( 'INFO: System already force converged, could not extract distance.' ) self.ctx.last_charge_density = None else: errormsg = 'ERROR: did not manage to extract charge density from the calculation' self.control_end_wc(errormsg) return self.exit_codes.ERROR_FLEUR_CALCULATION_FAILED else: self.ctx.last_charge_density = self.ctx.distance[-1] if self.ctx.nmmp_distance: self.ctx.last_nmmp_distance = max(self.ctx.nmmp_distance[-1])
def parse(self, **kwargs): """ Receives in input a dictionary of retrieved nodes. Does all the logic here. Checks presents of files. Calls routines to parse them and returns parameter nodes and success. :return successful: Bool, if overall parsing was successful or not :return new_nodes_list: list of tuples of two (linkname, Dataobject), nodes to be stored by AiiDA """ ####### init some variables ###### # these files should be at least present after success of a Fleur run calc = self.node FleurCalculation = calc.process_class # this files should be retrieved should_retrieve = calc.get_attribute('retrieve_list') has_xml_outfile = False has_dos_file = False has_bands_file = False has_relax_file = False dos_file = None band_file = None ######### Check presence of files ###### # select the folder object # Check that the retrieved folder is there try: output_folder = self.retrieved except NotExistent: self.logger.error('No retrieved folder found') return self.exit_codes.ERROR_NO_RETRIEVED_FOLDER # check what is inside the folder list_of_files = output_folder.list_object_names() self.logger.info('file list {}'.format(list_of_files)) # has output xml file, otherwise error if FleurCalculation._OUTXML_FILE_NAME not in list_of_files: self.logger.error("XML out not found '{}'".format(FleurCalculation._OUTXML_FILE_NAME)) return self.exit_codes.ERROR_NO_OUTXML else: has_xml_outfile = True # check if all files expected are there for the calculation for file in should_retrieve: if file not in list_of_files: self.logger.warning("'{}' file not found in retrived folder, it" ' was probably not created by fleur'.format(file)) # check if something was written to the error file if FleurCalculation._ERROR_FILE_NAME in list_of_files: errorfile = FleurCalculation._ERROR_FILE_NAME # read try: with output_folder.open(errorfile, 'r') as efile: error_file_lines = efile.read() # Note: read(), not readlines() except IOError: self.logger.error('Failed to open error file: {}.'.format(errorfile)) return self.exit_codes.ERROR_OPENING_OUTPUTS if error_file_lines: if isinstance(error_file_lines, type(b'')): error_file_lines = error_file_lines.replace(b'\x00', b' ') else: error_file_lines = error_file_lines.replace('\x00', ' ') if 'Run finished successfully' not in error_file_lines: self.logger.warning('The following was written into std error and piped to {}' ' : \n {}'.format(errorfile, error_file_lines)) self.logger.error('FLEUR calculation did not finish' ' successfully.') # here we estimate how much memory was available and consumed mpiprocs = self.node.get_attribute('resources').get('num_mpiprocs_per_machine', 1) kb_used = 0.0 with output_folder.open(FleurCalculation._OUTXML_FILE_NAME, 'r') as out_file: # lazy out.xml parsing outlines = out_file.read() try: line_avail = re.findall(r'<mem memoryPerNode="\d+', outlines)[0] mem_kb_avail = int(re.findall(r'\d+', line_avail)[0]) except IndexError: mem_kb_avail = 1.0 self.logger.info('Did not manage to find memory available info.') else: usage_json = FleurCalculation._USAGE_FILE_NAME if usage_json in list_of_files: with output_folder.open(usage_json, 'r') as us_file: usage = json.load(us_file) kb_used = usage['data']['VmPeak'] else: try: line_used = re.findall(r'used.+', error_file_lines)[0] kb_used = int(re.findall(r'\d+', line_used)[2]) except IndexError: self.logger.info('Did not manage to find memory usage info.') # here we estimate how much walltime was available and consumed try: time_avail_sec = self.node.attributes['last_job_info']['requested_wallclock_time_seconds'] time_calculated = self.node.attributes['last_job_info']['wallclock_time_seconds'] if 0.97 * time_avail_sec < time_calculated: return self.exit_codes.ERROR_TIME_LIMIT except KeyError: pass if (kb_used * mpiprocs / mem_kb_avail > 0.93 or 'cgroup out-of-memory handler' in error_file_lines or 'Out Of Memory' in error_file_lines): return self.exit_codes.ERROR_NOT_ENOUGH_MEMORY elif 'TIME LIMIT' in error_file_lines or 'time limit' in error_file_lines: return self.exit_codes.ERROR_TIME_LIMIT elif 'Atom spills out into vacuum during relaxation' in error_file_lines: return self.exit_codes.ERROR_VACUUM_SPILL_RELAX elif 'Error checking M.T. radii' in error_file_lines: return self.exit_codes.ERROR_MT_RADII elif 'Overlapping MT-spheres during relaxation: ' in error_file_lines: overlap_line = re.findall(r'\S+ +\S+ olap: +\S+', error_file_lines)[0].split() with output_folder.open('relax.xml', 'r') as rlx: schema_dict = InputSchemaDict.fromVersion('0.34') relax_dict = parse_relax_file(rlx, schema_dict) it_number = len(relax_dict['energies']) + 1 # relax.xml was not updated error_params = { 'error_name': 'MT_OVERLAP_RELAX', 'description': ('This output node contains information' 'about FLEUR error'), 'overlapped_indices': overlap_line[:2], 'overlaping_value': overlap_line[3], 'iteration_number': it_number } link_name = self.get_linkname_outparams() error_params = Dict(dict=error_params) self.out('error_params', error_params) return self.exit_codes.ERROR_MT_RADII_RELAX elif 'parent_folder' in calc.inputs: # problem in reusing cdn for relaxations, drop cdn if 'fleurinpdata' in calc.inputs: if 'relax.xml' in calc.inputs.fleurinpdata.files: return self.exit_codes.ERROR_DROP_CDN return self.exit_codes.ERROR_FLEUR_CALC_FAILED else: return self.exit_codes.ERROR_FLEUR_CALC_FAILED if FleurCalculation._DOS_FILE_NAME in list_of_files: has_dos = True if FleurCalculation._BAND_FILE_NAME in list_of_files: has_bands = True # if a relax.xml was retrieved if FleurCalculation._RELAX_FILE_NAME in list_of_files: self.logger.info('relax.xml file found in retrieved folder') has_relax_file = True ####### Parse the files ######## if has_xml_outfile: # open output file with output_folder.open(FleurCalculation._OUTXML_FILE_NAME, 'rb') as outxmlfile_opened: success = True parser_info = {} try: out_dict = outxml_parser(outxmlfile_opened, parser_info_out=parser_info, ignore_validation=True) except (ValueError, FileNotFoundError, KeyError) as exc: self.logger.error(f'XML output parsing failed: {str(exc)}') success = False # Call routines for output node creation if not success: self.logger.error('Parsing of XML output file was not successfull.') parameter_data = dict(list(parser_info.items())) outxml_params = Dict(dict=parameter_data) link_name = self.get_linkname_outparams() self.out(link_name, outxml_params) return self.exit_codes.ERROR_XMLOUT_PARSING_FAILED elif out_dict: outputdata = dict(list(out_dict.items()) + list(parser_info.items())) outxml_params = Dict(dict=outputdata) link_name = self.get_linkname_outparams() self.out(link_name, outxml_params) else: self.logger.error('Something went wrong, no out_dict found') parameter_data = dict(list(parser_info.items())) outxml_params = Dict(dict=parameter_data) link_name = self.get_linkname_outparams() self.out(link_name, outxml_params) # optional parse other files # DOS if has_dos_file: dos_file = FleurCalculation._DOS_FILE_NAME # if dos_file is not None: try: with output_folder.open(dos_file, 'r') as dosf: dos_lines = dosf.read() # Note: read() and not readlines() except IOError: self.logger.error('Failed to open DOS file: {}.'.format(dos_file)) return self.exit_codes.ERROR_OPENING_OUTPUTS dos_data = parse_dos_file(dos_lines) # , number_of_atom_types) # Bands if has_bands_file: # TODO: be carefull there might be two files. band_file = FleurCalculation._BAND_FILE_NAME # if band_file is not None: try: with output_folder.open(band_file, 'r') as bandf: bands_lines = bandf.read() # Note: read() and not readlines() except IOError: self.logger.error('Failed to open bandstructure file: {}.' ''.format(band_file)) return self.exit_codes.ERROR_OPENING_OUTPUTS bands_data = parse_bands_file(bands_lines) if has_relax_file: relax_name = FleurCalculation._RELAX_FILE_NAME try: fleurinp = calc.inputs.fleurinpdata except NotExistent: old_relax_text = '' else: if relax_name in fleurinp.list_object_names(): with fleurinp.open(relax_name, 'r') as rlx: old_relax_text = rlx.read() else: old_relax_text = '' inp_version = outxml_params.get_dict().get('input_file_version', '0.34') schema_dict = InputSchemaDict.fromVersion(inp_version) # dummy comparison between old and new relax with output_folder.open(relax_name, 'rb') as rlx: new_relax_text = rlx.read() if new_relax_text != old_relax_text: try: relax_dict = parse_relax_file(rlx, schema_dict) except etree.XMLSyntaxError: return self.exit_codes.ERROR_RELAX_PARSING_FAILED self.out('relax_parameters', relax_dict)