Esempio n. 1
0
    def build_output_parameters(parsed_stdout, parsed_xml):
        """Build the dictionary of output parameters from the raw parsed data.

        The output parameters are based on the union of raw parsed data from the XML and stdout output files.
        Currently, if both raw parsed data dictionaries contain the same key, the stdout version takes precedence, but
        this should not occur as the `parse_stdout` method should already have solved these conflicts.

        :param parsed_stdout: the raw parsed data dictionary from the stdout output file
        :param parsed_xml: the raw parsed data dictionary from the XML output file
        :return: the union of the two parsed raw and information about the parser
        """
        from aiida_quantumespresso.parsers import get_parser_info

        parsed_info = get_parser_info(parser_info_template='aiida-quantumespresso parser pw.x v{}')

        for key in list(parsed_stdout.keys()):
            if key in list(parsed_xml.keys()):
                if parsed_stdout[key] != parsed_xml[key]:
                    raise AssertionError(
                        '{} found in both dictionaries with different values: {} vs. {}'.format(
                            key, parsed_stdout[key], parsed_xml[key]
                        )
                    )

        parameters = dict(list(parsed_xml.items()) + list(parsed_stdout.items()) + list(parsed_info.items()))

        return parameters
def parse_cp_raw_output(out_file,xml_file=None,xml_counter_file=None):

    parser_info = get_parser_info(parser_info_template='aiida-quantumespresso parser cp.x v{}')

    # analyze the xml
    if xml_file is not None:
        try:
            with open(xml_file,'r') as f:
                xml_lines = f.read()
        except IOError:
            raise QEOutputParsingError("Failed to open xml file: %s."
                                       .format(xml_file) )
        # TODO: this function should probably be the same of pw.
        # after all, the parser was fault-tolerant
        xml_data=parse_cp_xml_output(xml_lines)
    else:
        parser_info['parser_warnings'].append('Skipping the parsing of the xml file.')
        xml_data = {}


    # analyze the counter file, which keeps info on the steps
    if xml_counter_file is not None:
        try:
            with open(xml_counter_file,'r') as f:
                xml_counter_lines = f.read()
        except IOError:
            raise QEOutputParsingError("Failed to open xml counter file: %s."
                                       .format(xml_file) )
        xml_counter_data=parse_cp_xml_counter_output(xml_counter_lines)
    else:
        xml_counter_data={}

    # analyze the standard output
    try:
        with open(out_file,'r') as f:
            out_lines = f.readlines()
    except IOError:
        raise QEOutputParsingError("Failed to open output file: %s." % out_file)

    # understand if the job ended smoothly
    job_successful=False
    for line in reversed(out_lines):
        if 'JOB DONE' in line:
            job_successful=True
            break

    out_data=parse_cp_text_output(out_lines,xml_data)

    for key in out_data.keys():
        if key in xml_data.keys():
            raise AssertionError('%s found in both dictionaries' % key)
        if key in xml_counter_data.keys():
            raise AssertionError('%s found in both dictionaries' % key)
        # out_data keys take precedence and overwrite xml_data keys,
        # if the same key name is shared by both (but this should not happen!)
    final_data = dict(xml_data.items() + out_data.items() + xml_counter_data.items())

    # TODO: parse the trajectory and save them in a reasonable format

    return final_data,job_successful
Esempio n. 3
0
def parse_output_base(filecontent, codename=None, message_map=None):
    """Parses the output file of a QE calculation, just checking for basic content like JOB DONE, errors with %%%% etc.

    :param filecontent: a string with the output file content
    :param codename: the string printed both in the header and near the walltime.
        If passed, a few more things are parsed (e.g. code version, walltime, ...)
    :returns: tuple of two dictionaries, with the parsed data and log messages, respectively
    """
    from aiida_quantumespresso.utils.mapping import get_logging_container

    keys = ['error', 'warning']

    if message_map is not None and (not isinstance(message_map, dict) or any(key not in message_map for key in keys)):
        raise RuntimeError('invalid format `message_map`: should be dictionary with two keys {}'.format(keys))

    logs = get_logging_container()
    parsed_data = get_parser_info(parser_info_template='aiida-quantumespresso parser simple v{}')

    lines = filecontent if isinstance(filecontent, list) else filecontent.split('\n')

    for line in lines:
        if 'JOB DONE' in line:
            break
    else:
        logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')

    if codename is not None:

        codestring = 'Program {}'.format(codename)

        for line_number, line in enumerate(lines):

            if codestring in line and 'starts on' in line:
                parsed_data['code_version'] = line.split(codestring)[1].split('starts on')[0].strip()

            # Parse the walltime
            if codename in line and 'WALL' in line:
                try:
                    time = line.split('CPU')[1].split('WALL')[0].strip()
                    parsed_data['wall_time'] = time
                except (ValueError, IndexError):
                    logs.warnings.append('ERROR_PARSING_WALLTIME')
                else:
                    try:
                        parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time)
                    except ValueError:
                        logs.warnings.append('ERROR_CONVERTING_WALLTIME_TO_SECONDS')

            # Parse an error message with optional mapping of the message
            if '%%%%%%%%%%%%%%' in line:
                parse_output_error(lines, line_number, logs, message_map)

    return parsed_data, logs
Esempio n. 4
0
def parse_cp_raw_output(out_file, xml_file=None, xml_counter_file=None):

    parser_info = get_parser_info(
        parser_info_template='aiida-quantumespresso parser cp.x v{}')

    # analyze the xml
    if xml_file is not None:
        xml_data = parse_cp_xml_output(xml_file.read())
    else:
        parser_info['parser_warnings'].append(
            'Skipping the parsing of the xml file.')
        xml_data = {}

    # analyze the counter file, which keeps info on the steps
    if xml_counter_file is not None:
        xml_counter_data = parse_cp_xml_counter_output(xml_counter_file.read())
    else:
        xml_counter_data = {}

    # analyze the standard output
    out_lines = out_file.readlines()

    # understand if the job ended smoothly
    job_successful = any('JOB DONE' in line for line in reversed(out_lines))

    out_data = parse_cp_text_output(out_lines, xml_data)

    for key in out_data.keys():
        if key in list(xml_data.keys()):
            raise AssertionError('%s found in both dictionaries' % key)
        if key in list(xml_counter_data.keys()):
            raise AssertionError('%s found in both dictionaries' % key)
        # out_data keys take precedence and overwrite xml_data keys,
        # if the same key name is shared by both (but this should not happen!)

    final_data = dict(
        list(xml_data.items()) + list(out_data.items()) +
        list(xml_counter_data.items()))

    # TODO: parse the trajectory and save them in a reasonable format

    return final_data, job_successful
Esempio n. 5
0
def parse_raw_ph_output(stdout, tensors=None, dynamical_matrices=None):
    """Parses the raw output of a Quantum ESPRESSO `ph.x` calculation.

    :param stdout: the content of the stdout file as a string
    :param tensors: the content of the tensors.xml file as a string
    :param dynamical_matrices: a list of the content of the dynamical matrix files as a string
    :returns: tuple of two dictionaries, with the parsed data and log messages, respectively
    """
    logs = get_logging_container()
    data_lines = stdout.split('\n')
    parser_info = get_parser_info(
        parser_info_template='aiida-quantumespresso parser ph.x v{}')

    # First check whether the `JOB DONE` message was written, otherwise the job was interrupted
    for line in data_lines:
        if 'JOB DONE' in line:
            break
    else:
        logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')

    # Parse tensors, if present
    tensor_data = {}
    if tensors:
        try:
            tensor_data = parse_ph_tensor(tensors)
        except QEOutputParsingError:
            logs.warning.append('Error while parsing the tensor files')

    out_data = parse_ph_text_output(data_lines, logs)

    # parse dynamical matrices if present
    dynmat_data = {}
    if dynamical_matrices:
        # find lattice parameter
        for dynmat_counter, dynmat in enumerate(dynamical_matrices):

            lines = dynmat.split('\n')

            # check if the file contains frequencies (i.e. is useful) or not
            dynmat_to_parse = False
            if not lines:
                continue
            try:
                _ = [float(i) for i in lines[0].split()]
            except ValueError:
                dynmat_to_parse = True

            if not dynmat_to_parse:
                continue

            # parse it
            this_dynmat_data = parse_ph_dynmat(lines, logs)

            # join it with the previous dynmat info
            dynmat_data['dynamical_matrix_%s' %
                        dynmat_counter] = this_dynmat_data
            # TODO: use the bands format?

    # join dictionaries, there should not be any twice repeated key
    for key in out_data.keys():
        if key in list(tensor_data.keys()):
            raise AssertionError('{} found in two dictionaries'.format(key))
        if key in list(dynmat_data.keys()):
            raise AssertionError('{} found in two dictionaries'.format(key))

    # I don't check the dynmat_data and parser_info keys
    parsed_data = dict(
        list(dynmat_data.items()) + list(out_data.items()) +
        list(tensor_data.items()) + list(parser_info.items()))

    return parsed_data, logs
Esempio n. 6
0
def parse_raw_output_neb(out_file, input_dict, parser_opts=None):
    """Parses the output of a neb calculation Receives in input the paths to the output file.

    :param out_file: path to neb std output
    :param input_dict: dictionary with the neb input parameters
    :param parser_opts: not used

    :return parameter_data: a dictionary with parsed parameters
    :return iteration_data: a dictionary with arrays (for relax & md calcs.)
    :return job_successful: a boolean that is False in case of failed calculations

    :raises QEOutputParsingError: for errors in the parsing,

    2 different keys to check in output: parser_warnings and warnings.
    On an upper level, these flags MUST be checked.
    The first is expected to be empty unless QE failures or unfinished jobs.
    """
    import copy

    job_successful = True
    parser_info = get_parser_info(
        parser_info_template='aiida-quantumespresso parser neb.x v{}')

    # load NEB output file
    try:
        with open(out_file, 'r') as f:
            out_lines = f.read()
    except IOError:  # non existing output file -> job crashed
        raise QEOutputParsingError(
            'Failed to open output file: {}.'.format(out_file))

    if not out_lines:  # there is an output file, but it's empty -> crash
        job_successful = False

    # check if the job has finished (that doesn't mean without errors)
    finished_run = False
    for line in out_lines.split('\n')[::-1]:
        if 'JOB DONE' in line:
            finished_run = True
            break
    if not finished_run:  # error if the job has not finished
        warning = 'QE neb run did not reach the end of the execution.'
        parser_info['parser_warnings'].append(warning)
        job_successful = False

    # parse the text output of the neb calculation
    try:
        out_data, iteration_data, critical_messages = parse_neb_text_output(
            out_lines, input_dict)
    except QEOutputParsingError as exc:
        if not finished_run:  # I try to parse it as much as possible
            parser_info['parser_warnings'].append(
                'Error while parsing the output file')
            out_data = {'warnings': []}
            iteration_data = {}
            critical_messages = []
        else:  # if it was finished and I got an error, it's a mistake of the parser
            raise QEOutputParsingError(
                'Error while parsing NEB text output: {}'.format(exc))

    # I add in the out_data all the last elements of iteration_data values.
    # I leave the possibility to skip some large arrays (None for the time being).
    skip_keys = []
    tmp_iteration_data = copy.copy(iteration_data)
    for k, v in tmp_iteration_data.items():
        if k in skip_keys:
            continue
        out_data[k] = v[-1]

    # if there is a severe error, the calculation is FAILED
    if any([x in out_data['warnings'] for x in critical_messages]):
        job_successful = False

    parameter_data = dict(list(out_data.items()) + list(parser_info.items()))

    # return various data.
    # parameter data will be mapped in Dict
    # iteration_data in ArrayData
    return parameter_data, iteration_data, job_successful
Esempio n. 7
0
def parse_qe_simple(filecontent, codename=None):
    """
    Parses the output file of a QE calculation, just checking for basic content
    like JOB DONE, errors with %%%% etc.

    :param filecontent: a string with the output file content
    :param codename: the string printed both in the header and near the walltime.
        If passed, a few more things are parsed (e.g. code version, walltime, ...)
    :return: (successful, out_dict) where successful is a boolean (False is a critical error occurred);
        out_dict is a dictionary with parsed information (e.g. a list of warnings) that could e.g. be
        returned as a Dict by the parser.
    """
    # suppose at the start that the job is successful
    successful = True
    parser_info = get_parser_info(
        parser_info_template='aiida-quantumespresso parser simple v{}')
    parsed_data = {'warnings': []}
    parsed_data.update(parser_info)

    generic_error_message = "There was an error, please check the 'error_message' key"

    if 'JOB DONE' not in filecontent:
        successful = False
        msg = 'Computation did not finish properly'
        parsed_data['warnings'].append(msg)

    lines = filecontent.split('\n')

    if codename is not None:
        for count, line in enumerate(lines):

            codestring = 'Program {}'.format(codename)
            if codestring in line and 'starts on' in line:
                parsed_data['code_version'] = line.split(codestring)[1].split(
                    'starts on')[0].strip()

            # parse the global file, for informations that are written only once
            if codename in line and 'WALL' in line:
                try:
                    time = line.split('CPU')[1].split('WALL')[0].strip()
                    parsed_data['wall_time'] = time
                except (ValueError, IndexError):
                    parsed_data['warnings'].append(
                        'Error while parsing wall time.')
                else:
                    try:
                        parsed_data[
                            'wall_time_seconds'] = convert_qe_time_to_sec(time)
                    except ValueError:
                        raise QEOutputParsingError(
                            'Unable to convert wall_time in seconds.')

            if '%%%%%%%%%%%%%%' in line:
                if generic_error_message not in parsed_data['warnings']:
                    parsed_data['warnings'].append(generic_error_message)
                if 'error_message' not in parsed_data:
                    parsed_data['error_message'] = []
                successful = False
                # Pass count=0 to start from the top of the file (anyway, it's a short file)
                # pass an empty warnings list because we don't have existing warnings
                # (this is used to avoid duplication of errors)
                messages = parse_QE_errors(
                    lines, count=count, warnings=parsed_data['error_message'])

                # if it found something, add to log
                if len(messages) > 0:
                    parsed_data['error_message'].extend(messages)

    return successful, parsed_data
def parse_raw_output(out_file,
                     input_dict,
                     parser_opts=None,
                     xml_file=None,
                     dir_with_bands=None):
    """
    Parses the output of a calculation
    Receives in input the paths to the output file and the xml file.

    :param out_file: path to pw std output
    :param input_dict: not used
    :param parser_opts: not used
    :param dir_with_bands: path to directory with all k-points (Kxxxxx) folders
    :param xml_file: path to QE data-file.xml

    :returns out_dict: a dictionary with parsed data
    :return successful: a boolean that is False in case of failed calculations

    :raises QEOutputParsingError: for errors in the parsing,
    :raises AssertionError: if two keys in the parsed dicts are found to be qual

    3 different keys to check in output: parser_warnings, xml_warnings and warnings.
    On an upper level, these flags MUST be checked.
    The first two are expected to be empty unless QE failures or unfinished jobs.
    """
    import copy
    # TODO: a lot of ifs could be cleaned out

    # TODO: input_dict should be used as well

    job_successful = True
    parser_info = get_parser_info(
        parser_info_template='aiida-quantumespresso basic parser pw.x v{}')

    # if xml_file is not given in input, skip its parsing
    if xml_file is not None:
        try:
            with open(xml_file, 'r') as f:
                xml_lines = f.read()  # Note: read() and not readlines()
        except IOError:
            raise QEOutputParsingError(
                "Failed to open xml file: {}.".format(xml_file))

        xml_data, structure_data = parse_pw_xml_output(xml_lines,
                                                       dir_with_bands)
        # Note the xml file should always be consistent.
    else:
        parser_info['parser_warnings'].append(
            'Skipping the parsing of the xml file.')
        xml_data = {}
        bands_data = {}
        structure_data = {}

    # load QE out file
    try:
        with open(out_file, 'r') as f:
            out_lines = f.read()
    except IOError:  # non existing output file -> job crashed
        raise QEOutputParsingError(
            "Failed to open output file: {}.".format(out_file))

    if not out_lines:  # there is an output file, but it's empty -> crash
        job_successful = False

    # check if the job has finished (that doesn't mean without errors)
    finished_run = False
    for line in out_lines.split('\n')[::-1]:
        if 'JOB DONE' in line:
            finished_run = True
            break
    if not finished_run:  # error if the job has not finished
        warning = 'QE pw run did not reach the end of the execution.'
        parser_info['parser_warnings'].append(warning)
        job_successful = False

    # parse
    try:
        out_data, trajectory_data, critical_messages = parse_pw_text_output(
            out_lines, xml_data, structure_data, input_dict)
    except QEOutputParsingError:
        if not finished_run:  # I try to parse it as much as possible
            parser_info['parser_warnings'].append(
                'Error while parsing the output file')
            out_data = {}
            trajectory_data = {}
            critical_messages = []
        else:  # if it was finished and I got error, it's a mistake of the parser
            raise QEOutputParsingError('Error while parsing QE output')

    # I add in the out_data all the last elements of trajectory_data values.
    # Safe for some large arrays, that I will likely never query.
    skip_keys = [
        'forces', 'lattice_vectors_relax', 'atomic_positions_relax',
        'atomic_species_name'
    ]
    tmp_trajectory_data = copy.copy(trajectory_data)
    for x in tmp_trajectory_data.iteritems():
        if x[0] in skip_keys:
            continue
        out_data[x[0]] = x[1][-1]
        if len(x[1]
               ) == 1:  # delete eventual keys that are not arrays (scf cycles)
            trajectory_data.pop(x[0])
            # note: if an array is empty, there will be KeyError
    for key in ['k_points', 'k_points_weights']:
        try:
            trajectory_data[key] = xml_data.pop(key)
        except KeyError:
            pass
    # As the k points are an array that is rather large, and again it's not something I'm going to parse likely
    # since it's an info mainly contained in the input file, I move it to the trajectory data

    # if there is a severe error, the calculation is FAILED
    if any([x in out_data['warnings'] for x in critical_messages]):
        job_successful = False

    for key in out_data.keys():
        if key in xml_data.keys():
            if key == 'fermi_energy' or key == 'fermi_energy_units':  # an exception for the (only?) key that may be found on both
                del out_data[key]
            else:
                raise AssertionError(
                    '{} found in both dictionaries, '
                    'values: {} vs. {}'.format(
                        key, out_data[key],
                        xml_data[key]))  # this shouldn't happen!
                # out_data keys take precedence and overwrite xml_data keys,
                # if the same key name is shared by both
                # dictionaries (but this should not happen!)
    parameter_data = dict(xml_data.items() + out_data.items() +
                          parser_info.items())

    # return various data.
    # parameter data will be mapped in ParameterData
    # trajectory_data in ArrayData
    # structure_data in a Structure
    # bands_data should probably be merged in ArrayData
    return parameter_data, trajectory_data, structure_data, job_successful