def build_output_parameters(parsed_stdout, parsed_xml): """Build the dictionary of output parameters from the raw parsed data. The output parameters are based on the union of raw parsed data from the XML and stdout output files. Currently, if both raw parsed data dictionaries contain the same key, the stdout version takes precedence, but this should not occur as the `parse_stdout` method should already have solved these conflicts. :param parsed_stdout: the raw parsed data dictionary from the stdout output file :param parsed_xml: the raw parsed data dictionary from the XML output file :return: the union of the two parsed raw and information about the parser """ from aiida_quantumespresso.parsers import get_parser_info parsed_info = get_parser_info(parser_info_template='aiida-quantumespresso parser pw.x v{}') for key in list(parsed_stdout.keys()): if key in list(parsed_xml.keys()): if parsed_stdout[key] != parsed_xml[key]: raise AssertionError( '{} found in both dictionaries with different values: {} vs. {}'.format( key, parsed_stdout[key], parsed_xml[key] ) ) parameters = dict(list(parsed_xml.items()) + list(parsed_stdout.items()) + list(parsed_info.items())) return parameters
def parse_cp_raw_output(out_file,xml_file=None,xml_counter_file=None): parser_info = get_parser_info(parser_info_template='aiida-quantumespresso parser cp.x v{}') # analyze the xml if xml_file is not None: try: with open(xml_file,'r') as f: xml_lines = f.read() except IOError: raise QEOutputParsingError("Failed to open xml file: %s." .format(xml_file) ) # TODO: this function should probably be the same of pw. # after all, the parser was fault-tolerant xml_data=parse_cp_xml_output(xml_lines) else: parser_info['parser_warnings'].append('Skipping the parsing of the xml file.') xml_data = {} # analyze the counter file, which keeps info on the steps if xml_counter_file is not None: try: with open(xml_counter_file,'r') as f: xml_counter_lines = f.read() except IOError: raise QEOutputParsingError("Failed to open xml counter file: %s." .format(xml_file) ) xml_counter_data=parse_cp_xml_counter_output(xml_counter_lines) else: xml_counter_data={} # analyze the standard output try: with open(out_file,'r') as f: out_lines = f.readlines() except IOError: raise QEOutputParsingError("Failed to open output file: %s." % out_file) # understand if the job ended smoothly job_successful=False for line in reversed(out_lines): if 'JOB DONE' in line: job_successful=True break out_data=parse_cp_text_output(out_lines,xml_data) for key in out_data.keys(): if key in xml_data.keys(): raise AssertionError('%s found in both dictionaries' % key) if key in xml_counter_data.keys(): raise AssertionError('%s found in both dictionaries' % key) # out_data keys take precedence and overwrite xml_data keys, # if the same key name is shared by both (but this should not happen!) final_data = dict(xml_data.items() + out_data.items() + xml_counter_data.items()) # TODO: parse the trajectory and save them in a reasonable format return final_data,job_successful
def parse_output_base(filecontent, codename=None, message_map=None): """Parses the output file of a QE calculation, just checking for basic content like JOB DONE, errors with %%%% etc. :param filecontent: a string with the output file content :param codename: the string printed both in the header and near the walltime. If passed, a few more things are parsed (e.g. code version, walltime, ...) :returns: tuple of two dictionaries, with the parsed data and log messages, respectively """ from aiida_quantumespresso.utils.mapping import get_logging_container keys = ['error', 'warning'] if message_map is not None and (not isinstance(message_map, dict) or any(key not in message_map for key in keys)): raise RuntimeError('invalid format `message_map`: should be dictionary with two keys {}'.format(keys)) logs = get_logging_container() parsed_data = get_parser_info(parser_info_template='aiida-quantumespresso parser simple v{}') lines = filecontent if isinstance(filecontent, list) else filecontent.split('\n') for line in lines: if 'JOB DONE' in line: break else: logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') if codename is not None: codestring = 'Program {}'.format(codename) for line_number, line in enumerate(lines): if codestring in line and 'starts on' in line: parsed_data['code_version'] = line.split(codestring)[1].split('starts on')[0].strip() # Parse the walltime if codename in line and 'WALL' in line: try: time = line.split('CPU')[1].split('WALL')[0].strip() parsed_data['wall_time'] = time except (ValueError, IndexError): logs.warnings.append('ERROR_PARSING_WALLTIME') else: try: parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time) except ValueError: logs.warnings.append('ERROR_CONVERTING_WALLTIME_TO_SECONDS') # Parse an error message with optional mapping of the message if '%%%%%%%%%%%%%%' in line: parse_output_error(lines, line_number, logs, message_map) return parsed_data, logs
def parse_cp_raw_output(out_file, xml_file=None, xml_counter_file=None): parser_info = get_parser_info( parser_info_template='aiida-quantumespresso parser cp.x v{}') # analyze the xml if xml_file is not None: xml_data = parse_cp_xml_output(xml_file.read()) else: parser_info['parser_warnings'].append( 'Skipping the parsing of the xml file.') xml_data = {} # analyze the counter file, which keeps info on the steps if xml_counter_file is not None: xml_counter_data = parse_cp_xml_counter_output(xml_counter_file.read()) else: xml_counter_data = {} # analyze the standard output out_lines = out_file.readlines() # understand if the job ended smoothly job_successful = any('JOB DONE' in line for line in reversed(out_lines)) out_data = parse_cp_text_output(out_lines, xml_data) for key in out_data.keys(): if key in list(xml_data.keys()): raise AssertionError('%s found in both dictionaries' % key) if key in list(xml_counter_data.keys()): raise AssertionError('%s found in both dictionaries' % key) # out_data keys take precedence and overwrite xml_data keys, # if the same key name is shared by both (but this should not happen!) final_data = dict( list(xml_data.items()) + list(out_data.items()) + list(xml_counter_data.items())) # TODO: parse the trajectory and save them in a reasonable format return final_data, job_successful
def parse_raw_ph_output(stdout, tensors=None, dynamical_matrices=None): """Parses the raw output of a Quantum ESPRESSO `ph.x` calculation. :param stdout: the content of the stdout file as a string :param tensors: the content of the tensors.xml file as a string :param dynamical_matrices: a list of the content of the dynamical matrix files as a string :returns: tuple of two dictionaries, with the parsed data and log messages, respectively """ logs = get_logging_container() data_lines = stdout.split('\n') parser_info = get_parser_info( parser_info_template='aiida-quantumespresso parser ph.x v{}') # First check whether the `JOB DONE` message was written, otherwise the job was interrupted for line in data_lines: if 'JOB DONE' in line: break else: logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') # Parse tensors, if present tensor_data = {} if tensors: try: tensor_data = parse_ph_tensor(tensors) except QEOutputParsingError: logs.warning.append('Error while parsing the tensor files') out_data = parse_ph_text_output(data_lines, logs) # parse dynamical matrices if present dynmat_data = {} if dynamical_matrices: # find lattice parameter for dynmat_counter, dynmat in enumerate(dynamical_matrices): lines = dynmat.split('\n') # check if the file contains frequencies (i.e. is useful) or not dynmat_to_parse = False if not lines: continue try: _ = [float(i) for i in lines[0].split()] except ValueError: dynmat_to_parse = True if not dynmat_to_parse: continue # parse it this_dynmat_data = parse_ph_dynmat(lines, logs) # join it with the previous dynmat info dynmat_data['dynamical_matrix_%s' % dynmat_counter] = this_dynmat_data # TODO: use the bands format? # join dictionaries, there should not be any twice repeated key for key in out_data.keys(): if key in list(tensor_data.keys()): raise AssertionError('{} found in two dictionaries'.format(key)) if key in list(dynmat_data.keys()): raise AssertionError('{} found in two dictionaries'.format(key)) # I don't check the dynmat_data and parser_info keys parsed_data = dict( list(dynmat_data.items()) + list(out_data.items()) + list(tensor_data.items()) + list(parser_info.items())) return parsed_data, logs
def parse_raw_output_neb(out_file, input_dict, parser_opts=None): """Parses the output of a neb calculation Receives in input the paths to the output file. :param out_file: path to neb std output :param input_dict: dictionary with the neb input parameters :param parser_opts: not used :return parameter_data: a dictionary with parsed parameters :return iteration_data: a dictionary with arrays (for relax & md calcs.) :return job_successful: a boolean that is False in case of failed calculations :raises QEOutputParsingError: for errors in the parsing, 2 different keys to check in output: parser_warnings and warnings. On an upper level, these flags MUST be checked. The first is expected to be empty unless QE failures or unfinished jobs. """ import copy job_successful = True parser_info = get_parser_info( parser_info_template='aiida-quantumespresso parser neb.x v{}') # load NEB output file try: with open(out_file, 'r') as f: out_lines = f.read() except IOError: # non existing output file -> job crashed raise QEOutputParsingError( 'Failed to open output file: {}.'.format(out_file)) if not out_lines: # there is an output file, but it's empty -> crash job_successful = False # check if the job has finished (that doesn't mean without errors) finished_run = False for line in out_lines.split('\n')[::-1]: if 'JOB DONE' in line: finished_run = True break if not finished_run: # error if the job has not finished warning = 'QE neb run did not reach the end of the execution.' parser_info['parser_warnings'].append(warning) job_successful = False # parse the text output of the neb calculation try: out_data, iteration_data, critical_messages = parse_neb_text_output( out_lines, input_dict) except QEOutputParsingError as exc: if not finished_run: # I try to parse it as much as possible parser_info['parser_warnings'].append( 'Error while parsing the output file') out_data = {'warnings': []} iteration_data = {} critical_messages = [] else: # if it was finished and I got an error, it's a mistake of the parser raise QEOutputParsingError( 'Error while parsing NEB text output: {}'.format(exc)) # I add in the out_data all the last elements of iteration_data values. # I leave the possibility to skip some large arrays (None for the time being). skip_keys = [] tmp_iteration_data = copy.copy(iteration_data) for k, v in tmp_iteration_data.items(): if k in skip_keys: continue out_data[k] = v[-1] # if there is a severe error, the calculation is FAILED if any([x in out_data['warnings'] for x in critical_messages]): job_successful = False parameter_data = dict(list(out_data.items()) + list(parser_info.items())) # return various data. # parameter data will be mapped in Dict # iteration_data in ArrayData return parameter_data, iteration_data, job_successful
def parse_qe_simple(filecontent, codename=None): """ Parses the output file of a QE calculation, just checking for basic content like JOB DONE, errors with %%%% etc. :param filecontent: a string with the output file content :param codename: the string printed both in the header and near the walltime. If passed, a few more things are parsed (e.g. code version, walltime, ...) :return: (successful, out_dict) where successful is a boolean (False is a critical error occurred); out_dict is a dictionary with parsed information (e.g. a list of warnings) that could e.g. be returned as a Dict by the parser. """ # suppose at the start that the job is successful successful = True parser_info = get_parser_info( parser_info_template='aiida-quantumespresso parser simple v{}') parsed_data = {'warnings': []} parsed_data.update(parser_info) generic_error_message = "There was an error, please check the 'error_message' key" if 'JOB DONE' not in filecontent: successful = False msg = 'Computation did not finish properly' parsed_data['warnings'].append(msg) lines = filecontent.split('\n') if codename is not None: for count, line in enumerate(lines): codestring = 'Program {}'.format(codename) if codestring in line and 'starts on' in line: parsed_data['code_version'] = line.split(codestring)[1].split( 'starts on')[0].strip() # parse the global file, for informations that are written only once if codename in line and 'WALL' in line: try: time = line.split('CPU')[1].split('WALL')[0].strip() parsed_data['wall_time'] = time except (ValueError, IndexError): parsed_data['warnings'].append( 'Error while parsing wall time.') else: try: parsed_data[ 'wall_time_seconds'] = convert_qe_time_to_sec(time) except ValueError: raise QEOutputParsingError( 'Unable to convert wall_time in seconds.') if '%%%%%%%%%%%%%%' in line: if generic_error_message not in parsed_data['warnings']: parsed_data['warnings'].append(generic_error_message) if 'error_message' not in parsed_data: parsed_data['error_message'] = [] successful = False # Pass count=0 to start from the top of the file (anyway, it's a short file) # pass an empty warnings list because we don't have existing warnings # (this is used to avoid duplication of errors) messages = parse_QE_errors( lines, count=count, warnings=parsed_data['error_message']) # if it found something, add to log if len(messages) > 0: parsed_data['error_message'].extend(messages) return successful, parsed_data
def parse_raw_output(out_file, input_dict, parser_opts=None, xml_file=None, dir_with_bands=None): """ Parses the output of a calculation Receives in input the paths to the output file and the xml file. :param out_file: path to pw std output :param input_dict: not used :param parser_opts: not used :param dir_with_bands: path to directory with all k-points (Kxxxxx) folders :param xml_file: path to QE data-file.xml :returns out_dict: a dictionary with parsed data :return successful: a boolean that is False in case of failed calculations :raises QEOutputParsingError: for errors in the parsing, :raises AssertionError: if two keys in the parsed dicts are found to be qual 3 different keys to check in output: parser_warnings, xml_warnings and warnings. On an upper level, these flags MUST be checked. The first two are expected to be empty unless QE failures or unfinished jobs. """ import copy # TODO: a lot of ifs could be cleaned out # TODO: input_dict should be used as well job_successful = True parser_info = get_parser_info( parser_info_template='aiida-quantumespresso basic parser pw.x v{}') # if xml_file is not given in input, skip its parsing if xml_file is not None: try: with open(xml_file, 'r') as f: xml_lines = f.read() # Note: read() and not readlines() except IOError: raise QEOutputParsingError( "Failed to open xml file: {}.".format(xml_file)) xml_data, structure_data = parse_pw_xml_output(xml_lines, dir_with_bands) # Note the xml file should always be consistent. else: parser_info['parser_warnings'].append( 'Skipping the parsing of the xml file.') xml_data = {} bands_data = {} structure_data = {} # load QE out file try: with open(out_file, 'r') as f: out_lines = f.read() except IOError: # non existing output file -> job crashed raise QEOutputParsingError( "Failed to open output file: {}.".format(out_file)) if not out_lines: # there is an output file, but it's empty -> crash job_successful = False # check if the job has finished (that doesn't mean without errors) finished_run = False for line in out_lines.split('\n')[::-1]: if 'JOB DONE' in line: finished_run = True break if not finished_run: # error if the job has not finished warning = 'QE pw run did not reach the end of the execution.' parser_info['parser_warnings'].append(warning) job_successful = False # parse try: out_data, trajectory_data, critical_messages = parse_pw_text_output( out_lines, xml_data, structure_data, input_dict) except QEOutputParsingError: if not finished_run: # I try to parse it as much as possible parser_info['parser_warnings'].append( 'Error while parsing the output file') out_data = {} trajectory_data = {} critical_messages = [] else: # if it was finished and I got error, it's a mistake of the parser raise QEOutputParsingError('Error while parsing QE output') # I add in the out_data all the last elements of trajectory_data values. # Safe for some large arrays, that I will likely never query. skip_keys = [ 'forces', 'lattice_vectors_relax', 'atomic_positions_relax', 'atomic_species_name' ] tmp_trajectory_data = copy.copy(trajectory_data) for x in tmp_trajectory_data.iteritems(): if x[0] in skip_keys: continue out_data[x[0]] = x[1][-1] if len(x[1] ) == 1: # delete eventual keys that are not arrays (scf cycles) trajectory_data.pop(x[0]) # note: if an array is empty, there will be KeyError for key in ['k_points', 'k_points_weights']: try: trajectory_data[key] = xml_data.pop(key) except KeyError: pass # As the k points are an array that is rather large, and again it's not something I'm going to parse likely # since it's an info mainly contained in the input file, I move it to the trajectory data # if there is a severe error, the calculation is FAILED if any([x in out_data['warnings'] for x in critical_messages]): job_successful = False for key in out_data.keys(): if key in xml_data.keys(): if key == 'fermi_energy' or key == 'fermi_energy_units': # an exception for the (only?) key that may be found on both del out_data[key] else: raise AssertionError( '{} found in both dictionaries, ' 'values: {} vs. {}'.format( key, out_data[key], xml_data[key])) # this shouldn't happen! # out_data keys take precedence and overwrite xml_data keys, # if the same key name is shared by both # dictionaries (but this should not happen!) parameter_data = dict(xml_data.items() + out_data.items() + parser_info.items()) # return various data. # parameter data will be mapped in ParameterData # trajectory_data in ArrayData # structure_data in a Structure # bands_data should probably be merged in ArrayData return parameter_data, trajectory_data, structure_data, job_successful