def create_trajectory_data(): """Create TrajectoryData object with two arrays.""" traj = TrajectoryData() # I create sample data stepids = np.array([60, 70]) times = stepids * 0.01 cells = np.array([[[ 2., 0., 0., ], [ 0., 2., 0., ], [ 0., 0., 2., ]], [[ 3., 0., 0., ], [ 0., 3., 0., ], [ 0., 0., 3., ]]]) symbols = ['H', 'O', 'C'] positions = np.array([[[0., 0., 0.], [0.5, 0.5, 0.5], [1.5, 1.5, 1.5]], [[0., 0., 0.], [0.5, 0.5, 0.5], [1.5, 1.5, 1.5]]]) velocities = np.array([[[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [-0.5, -0.5, -0.5]]]) # I set the node traj.set_trajectory( stepids=stepids, cells=cells, symbols=symbols, positions=positions, times=times, velocities=velocities ) traj.store() # Create 2 groups and add the data to one of them g_ne = Group(label='non_empty_group') g_ne.store() g_ne.add_nodes(traj) g_e = Group(label='empty_group') g_e.store() return { DummyVerdiDataListable.NODE_ID_STR: traj.id, DummyVerdiDataListable.NON_EMPTY_GROUP_ID_STR: g_ne.id, DummyVerdiDataListable.EMPTY_GROUP_ID_STR: g_e.id }
def parse(self, **kwargs): """Parse the retrieved files of a completed `NebCalculation` into output nodes. Two nodes that are expected are the default 'retrieved' `FolderData` node which will store the retrieved files permanently in the repository. The second required node is a filepath under the key `retrieved_temporary_files` which should contain the temporary retrieved files. """ from aiida.orm import TrajectoryData, ArrayData import os import numpy PREFIX = self.node.process_class._PREFIX # Check that the retrieved folder is there try: out_folder = self.retrieved except NotExistent: return self.exit(self.exit_codes.ERROR_NO_RETRIEVED_FOLDER) list_of_files = out_folder.list_object_names( ) # Note: this includes folders, but not the files they contain. # The stdout is required for parsing filename_stdout = self.node.get_attribute('output_filename') if filename_stdout not in list_of_files: return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) # Look for optional settings input node and potential 'parser_options' dictionary within it # Note that we look for both NEB and PW parser options under "inputs.settings.parser_options"; # we don't even have a namespace "inputs.pw.settings". try: settings = self.node.inputs.settings.get_dict() parser_options = settings[self.get_parser_settings_key()] except (AttributeError, KeyError, NotExistent): settings = {} parser_options = {} # load the pw input parameters dictionary pw_input_dict = self.node.inputs.pw__parameters.get_dict() # load the neb input parameters dictionary neb_input_dict = self.node.inputs.parameters.get_dict() # First parse the Neb output try: stdout = out_folder.get_object_content(filename_stdout) neb_out_dict, iteration_data, raw_successful = parse_raw_output_neb( stdout, neb_input_dict) # TODO: why do we ignore raw_successful ? except (OSError, QEOutputParsingError): return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) for warn_type in ['warnings', 'parser_warnings']: for message in neb_out_dict[warn_type]: self.logger.warning('parsing NEB output: {}'.format(message)) if 'QE neb run did not reach the end of the execution.' in neb_out_dict[ 'parser_warnings']: return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE) # Retrieve the number of images try: num_images = neb_input_dict['num_of_images'] except KeyError: try: num_images = neb_out_dict['num_of_images'] except KeyError: return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_PARSE) if num_images < 2: return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_PARSE) # Now parse the information from the individual pw calculations for the different images image_data = {} positions = [] cells = [] # for each image... for i in range(num_images): # check if any of the known XML output file names are present, and parse the first that we find relative_output_folder = os.path.join( '{}_{}'.format(PREFIX, i + 1), '{}.save'.format(PREFIX)) retrieved_files = self.retrieved.list_object_names( relative_output_folder) for xml_filename in PwCalculation.xml_filenames: if xml_filename in retrieved_files: xml_file_path = os.path.join(relative_output_folder, xml_filename) try: with out_folder.open(xml_file_path) as xml_file: parsed_data_xml, logs_xml = parse_pw_xml( xml_file, None) except IOError: return self.exit(self.exit_codes.ERROR_OUTPUT_XML_READ) except XMLParseError: return self.exit( self.exit_codes.ERROR_OUTPUT_XML_PARSE) except XMLUnsupportedFormatError: return self.exit( self.exit_codes.ERROR_OUTPUT_XML_FORMAT) except Exception: import traceback traceback.print_exc() return self.exit( self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION) # this image is dealt with, so break the inner loop and go to the next image break # otherwise, if none of the filenames we tried exists, exit with an error else: return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE) # look for pw output and parse it pw_out_file = os.path.join('{}_{}'.format(PREFIX, i + 1), 'PW.out') try: with out_folder.open(pw_out_file, 'r') as f: pw_out_text = f.read() # Note: read() and not readlines() except IOError: return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) try: parsed_data_stdout, logs_stdout = parse_pw_stdout( pw_out_text, pw_input_dict, parser_options, parsed_data_xml) except Exception: return self.exit( self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION) parsed_structure = parsed_data_stdout.pop('structure', {}) parsed_trajectory = parsed_data_stdout.pop('trajectory', {}) parsed_parameters = PwParser.build_output_parameters( parsed_data_xml, parsed_data_stdout) # Explicit information about k-points does not need to be queryable so we remove it from the parameters parsed_parameters.pop('k_points', None) parsed_parameters.pop('k_points_units', None) parsed_parameters.pop('k_points_weights', None) # Delete bands # TODO: this is just to make pytest happy; do we want to keep them instead? parsed_parameters.pop('bands', None) # Append the last frame of some of the smaller trajectory arrays to the parameters for easy querying PwParser.final_trajectory_frame_to_parameters( parsed_parameters, parsed_trajectory) # If the parser option 'all_symmetries' is False, we reduce the raw parsed symmetries to save space all_symmetries = False if parser_options is None else parser_options.get( 'all_symmetries', False) if not all_symmetries and 'cell' in parsed_structure: reduce_symmetries(parsed_parameters, parsed_structure, self.logger) structure_data = convert_qe2aiida_structure(parsed_structure) key = 'pw_output_image_{}'.format(i + 1) image_data[key] = parsed_parameters positions.append([site.position for site in structure_data.sites]) cells.append(structure_data.cell) # Add also PW warnings and errors to the neb output data, avoiding repetitions. for log_type in ['warning', 'error']: for message in logs_stdout[log_type]: formatted_message = '{}: {}'.format(log_type, message) if formatted_message not in neb_out_dict['warnings']: neb_out_dict['warnings'].append(formatted_message) # Symbols can be obtained simply from the last image symbols = [str(site.kind_name) for site in structure_data.sites] output_params = Dict( dict=dict(list(neb_out_dict.items()) + list(image_data.items()))) self.out('output_parameters', output_params) trajectory = TrajectoryData() trajectory.set_trajectory( stepids=numpy.arange(1, num_images + 1), cells=numpy.array(cells), symbols=symbols, positions=numpy.array(positions), ) self.out('output_trajectory', trajectory) if parser_options is not None and parser_options.get( 'all_iterations', False): if iteration_data: arraydata = ArrayData() for k, v in iteration_data.items(): arraydata.set_array(k, numpy.array(v)) self.out('iteration_array', arraydata) # Load the original and interpolated energy profile along the minimum-energy path (mep) try: filename = PREFIX + '.dat' with out_folder.open(filename, 'r') as handle: mep = numpy.loadtxt(handle) except Exception: self.logger.warning( 'could not open expected output file `{}`.'.format(filename)) mep = numpy.array([[]]) try: filename = PREFIX + '.int' with out_folder.open(filename, 'r') as handle: interp_mep = numpy.loadtxt(handle) except Exception: self.logger.warning( 'could not open expected output file `{}`.'.format(filename)) interp_mep = numpy.array([[]]) # Create an ArrayData with the energy profiles mep_arraydata = ArrayData() mep_arraydata.set_array('mep', mep) mep_arraydata.set_array('interpolated_mep', interp_mep) self.out('output_mep', mep_arraydata) return
def parse(self, **kwargs): """Receives in input a dictionary of retrieved nodes. Does all the logic here. """ retrieved = self.retrieved # check what is inside the folder list_of_files = retrieved._repository.list_object_names() # options.metadata become attributes like this: stdout_filename = self.node.get_attribute('output_filename') # at least the stdout should exist if stdout_filename not in list_of_files: return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) # This should match 1 file xml_files = [ xml_file for xml_file in self.node.process_class.xml_filenames if xml_file in list_of_files ] if not xml_files: return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE) elif len(xml_files) > 1: return self.exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE) # cp.x can produce, depending on the particular version of the code, a file called `print_counter.xml` or # `print_counter`, which is a plain text file with the number of the last timestep written in the trajectory # output. Note that if no trajectory is produced (for example because a single conjugate gradient step was # performed to calculate the ground state and the wavefunctions velocities) no printer_counter* file is written. print_counter_xml = True no_trajectory_output = False filename_counter_txt = self.node.process_class._FILE_PRINT_COUNTER_BASENAME filename_counter_xml = self.node.process_class._FILE_XML_PRINT_COUNTER_BASENAME # The following can happen and is not an error! if filename_counter_xml not in list_of_files and filename_counter_txt not in list_of_files: self.logger.error( f'We could not find the print counter file (`{filename_counter_txt}` or `{filename_counter_xml}`), ' 'assuming no trajectory output was produced') no_trajectory_output = True if not no_trajectory_output: if filename_counter_txt in list_of_files: self.logger.info('print counter not in xml format') print_counter_xml = False filename_counter = filename_counter_txt else: # xml format print_counter_xml = True self.logger.info('print counter in xml format') filename_counter = filename_counter_xml output_stdout = retrieved.get_object_content(stdout_filename) output_xml = retrieved.get_object_content(xml_files[0]) output_xml_counter = None if no_trajectory_output else retrieved.get_object_content( filename_counter) out_dict, _raw_successful = parse_cp_raw_output( output_stdout, output_xml, output_xml_counter, print_counter_xml) if not no_trajectory_output: # parse the trajectory. Units in Angstrom, picoseconds and eV. # append everthing in the temporary dictionary raw_trajectory raw_trajectory = {} evp_keys = [ 'electronic_kinetic_energy', 'cell_temperature', 'ionic_temperature', 'scf_total_energy', 'enthalpy', 'enthalpy_plus_kinetic', 'energy_constant_motion', 'volume', 'pressure' ] # order of atom in the output trajectory changed somewhere after 6.5 if LooseVersion(out_dict['creator_version']) > LooseVersion('6.5'): new_cp_ordering = True else: new_cp_ordering = False # Now prepare the reordering, as files in the xml are ordered if new_cp_ordering: reordering = None else: try: # this works for old xml only reordering = self._generate_sites_ordering( out_dict['species'], out_dict['atoms']) except KeyError: # this works for newer versions reordering = self._generate_sites_ordering( out_dict['structure']['species'], out_dict['structure']['atoms']) pos_filename = f'{self.node.process_class._PREFIX}.pos' if pos_filename not in list_of_files: out_dict['warnings'].append( 'Unable to open the POS file... skipping.') return self.exit_codes.ERROR_READING_POS_FILE number_of_atoms = out_dict.get( 'number_of_atoms', out_dict['structure']['number_of_atoms'] if 'structure' in out_dict else None) trajectories = [ ('positions', 'pos', CONSTANTS.bohr_to_ang, number_of_atoms), ('cells', 'cel', CONSTANTS.bohr_to_ang, 3), ('velocities', 'vel', CONSTANTS.bohr_to_ang / (CONSTANTS.timeau_to_sec * 10**12), number_of_atoms), ('forces', 'for', CONSTANTS.hartree_to_ev / CONSTANTS.bohr_to_ang, number_of_atoms), ] for name, extension, scale, elements in trajectories: try: with retrieved.open( f'{self.node.process_class._PREFIX}.{extension}' ) as datafile: data = [l.split() for l in datafile] # POSITIONS stored in angstrom traj_data = parse_cp_traj_stanzas( num_elements=elements, splitlines=data, prepend_name=f'{name}_traj', rescale=scale) # here initialize the dictionary. If the parsing of positions fails, though, I don't have anything # out of the CP dynamics. Therefore, the calculation status is set to FAILED. if extension != 'cel': raw_trajectory[ f'{name}_ordered'] = self._get_reordered_array( traj_data[f'{name}_traj_data'], reordering) else: # NOTE: the trajectory output has the cell matrix transposed!! raw_trajectory['cells'] = numpy.array( traj_data['cells_traj_data']).transpose((0, 2, 1)) if extension == 'pos': raw_trajectory['traj_times'] = numpy.array( traj_data[f'{name}_traj_times']) except IOError: out_dict['warnings'].append( f'Unable to open the {extension.upper()} file... skipping.' ) # =============== EVP trajectory ============================ try: with retrieved.open( f'{self._node.process_class._PREFIX}.evp') as handle: matrix = numpy.genfromtxt(handle) # there might be a different format if the matrix has one row only try: matrix.shape[1] except IndexError: matrix = numpy.array(numpy.matrix(matrix)) if LooseVersion( out_dict['creator_version']) > LooseVersion('5.1'): # Between version 5.1 and 5.1.1, someone decided to change # the .evp output format, without any way to know that this # happened... SVN commit 11158. # I here use the version number to parse, plus some # heuristics to check that I'm doing the right thing #print "New version" raw_trajectory['steps'] = numpy.array(matrix[:, 0], dtype=int) raw_trajectory['times'] = matrix[:, 1] # TPS, ps raw_trajectory[ 'electronic_kinetic_energy'] = matrix[:, 2] * CONSTANTS.hartree_to_ev # EKINC, eV raw_trajectory['cell_temperature'] = matrix[:, 3] # TEMPH, K raw_trajectory['ionic_temperature'] = matrix[:, 4] # TEMPP, K raw_trajectory[ 'scf_total_energy'] = matrix[:, 5] * CONSTANTS.hartree_to_ev # ETOT, eV raw_trajectory[ 'enthalpy'] = matrix[:, 6] * CONSTANTS.hartree_to_ev # ENTHAL, eV raw_trajectory[ 'enthalpy_plus_kinetic'] = matrix[:, 7] * CONSTANTS.hartree_to_ev # ECONS, eV raw_trajectory[ 'energy_constant_motion'] = matrix[:, 8] * CONSTANTS.hartree_to_ev # ECONT, eV raw_trajectory['volume'] = matrix[:, 9] * ( CONSTANTS.bohr_to_ang**3) # volume, angstrom^3 raw_trajectory['pressure'] = matrix[:, 10] # out_press, GPa else: #print "Old version" raw_trajectory['steps'] = numpy.array(matrix[:, 0], dtype=int) raw_trajectory[ 'electronic_kinetic_energy'] = matrix[:, 1] * CONSTANTS.hartree_to_ev # EKINC, eV raw_trajectory['cell_temperature'] = matrix[:, 2] # TEMPH, K raw_trajectory['ionic_temperature'] = matrix[:, 3] # TEMPP, K raw_trajectory[ 'scf_total_energy'] = matrix[:, 4] * CONSTANTS.hartree_to_ev # ETOT, eV raw_trajectory[ 'enthalpy'] = matrix[:, 5] * CONSTANTS.hartree_to_ev # ENTHAL, eV raw_trajectory[ 'enthalpy_plus_kinetic'] = matrix[:, 6] * CONSTANTS.hartree_to_ev # ECONS, eV raw_trajectory[ 'energy_constant_motion'] = matrix[:, 7] * CONSTANTS.hartree_to_ev # ECONT, eV raw_trajectory['volume'] = matrix[:, 8] * ( CONSTANTS.bohr_to_ang**3) # volume, angstrom^3 raw_trajectory['pressure'] = matrix[:, 9] # out_press, GPa raw_trajectory['times'] = matrix[:, 10] # TPS, ps # Huristics to understand if it's correct. # A better heuristics could also try to fix possible issues # (in new versions of QE, it's possible to recompile it with # the __OLD_FORMAT flag to get back the old version format...) # but I won't do it, as there may be also other columns swapped. # Better to stop and ask the user to check what's going on. #work around for 100ps format bug mask = numpy.array(raw_trajectory['traj_times']) >= 0 len_bugged = len( numpy.array(raw_trajectory['times'])[mask == False]) len_ok = len(numpy.array(raw_trajectory['times'])[mask]) if len_ok > 0: max_time_difference = abs( numpy.array(raw_trajectory['times'])[mask] - numpy.array(raw_trajectory['traj_times'])[mask]).max() else: max_time_difference = 0.0 if max_time_difference > 1.e-4 or ( len_bugged > 0 and numpy.array(raw_trajectory['times']) [mask == False].min() < 100.0 ): # It is typically ~1.e-7 due to roundoff errors # If there is a large discrepancy # it means there is something very weird going on... return self.exit_codes.ERROR_READING_TRAJECTORY_DATA # keep both times array (that usually are duplicated) # so that the user can check them by himselves if len_bugged > 0: out_dict['warnings'].append( '100ps format bug detected: ignoring trajectory\'s printed time from 100ps on' ) except IOError: out_dict['warnings'].append( 'Unable to open the EVP file... skipping.') # get the symbols from the input # TODO: I should have kinds in TrajectoryData input_structure = self.node.inputs.structure raw_trajectory['symbols'] = [ str(i.kind_name) for i in input_structure.sites ] traj = TrajectoryData() traj.set_trajectory( stepids=raw_trajectory['steps'], cells=raw_trajectory['cells'], symbols=raw_trajectory['symbols'], positions=raw_trajectory['positions_ordered'], times=raw_trajectory['times'], velocities=raw_trajectory['velocities_ordered'], ) # eventually set the forces try: traj.set_array('forces', raw_trajectory['forces_ordered']) except KeyError: out_dict['warnings'].append('failed to set forces') for this_name in evp_keys: try: traj.set_array(this_name, raw_trajectory[this_name]) except KeyError: # Some columns may have not been parsed, skip pass self.out('output_trajectory', traj) # Remove big dictionaries that would be redundant # For atoms and cell, there is a small possibility that nothing is parsed but then probably nothing moved. for key in [ 'atoms', 'cell', 'ions_positions_stau', 'ions_positions_svel', 'ions_positions_taui', 'atoms_index_list', 'atoms_if_pos_list', 'ions_positions_force', 'bands', 'structure' ]: out_dict.pop(key, None) # convert the dictionary into an AiiDA object output_params = Dict(dict=out_dict) self.out('output_parameters', output_params)
def parse(self, **kwargs): """Receives in input a dictionary of retrieved nodes. Does all the logic here. """ try: out_folder = self.retrieved except NotExistent: return self.exit(self.exit_codes.ERROR_NO_RETRIEVED_FOLDER) # check what is inside the folder list_of_files = out_folder._repository.list_object_names() # options.metadata become attributes like this: stdout_filename = self.node.get_attribute('output_filename') # at least the stdout should exist if stdout_filename not in list_of_files: return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) # This should match 1 file xml_files = [ xml_file for xml_file in self.node.process_class.xml_filenames if xml_file in list_of_files ] if not xml_files: return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE) elif len(xml_files) > 1: return self.exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE) if self.node.process_class._FILE_XML_PRINT_COUNTER_BASENAME not in list_of_files: self.logger.error( 'We could not find the print counter file in the output') # TODO: Add an error for this counter return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE) output_stdout = out_folder.get_object_content(stdout_filename) output_xml = out_folder.get_object_content(xml_files[0]) output_xml_counter = out_folder.get_object_content( self.node.process_class._FILE_XML_PRINT_COUNTER_BASENAME) out_dict, _raw_successful = parse_cp_raw_output( output_stdout, output_xml, output_xml_counter) # parse the trajectory. Units in Angstrom, picoseconds and eV. # append everthing in the temporary dictionary raw_trajectory raw_trajectory = {} evp_keys = [ 'electronic_kinetic_energy', 'cell_temperature', 'ionic_temperature', 'scf_total_energy', 'enthalpy', 'enthalpy_plus_kinetic', 'energy_constant_motion', 'volume', 'pressure' ] # Now prepare the reordering, as filex in the xml are ordered reordering = self._generate_sites_ordering(out_dict['species'], out_dict['atoms']) pos_filename = '{}.{}'.format(self.node.process_class._PREFIX, 'pos') if pos_filename not in list_of_files: return self.exit(self.exit_codes.ERROR_READING_POS_FILE) trajectories = [ ('positions', 'pos', CONSTANTS.bohr_to_ang, out_dict['number_of_atoms']), ('cells', 'cel', CONSTANTS.bohr_to_ang, 3), ('velocities', 'vel', CONSTANTS.bohr_to_ang / CONSTANTS.timeau_to_sec * 10**12, out_dict['number_of_atoms']), ] for name, extension, scale, elements in trajectories: try: with out_folder.open('{}.{}'.format( self.node.process_class._PREFIX, extension)) as datafile: data = [l.split() for l in datafile] # POSITIONS stored in angstrom traj_data = parse_cp_traj_stanzas( num_elements=elements, splitlines=data, prepend_name='{}_traj'.format(name), rescale=scale) # here initialize the dictionary. If the parsing of positions fails, though, I don't have anything # out of the CP dynamics. Therefore, the calculation status is set to FAILED. if extension != 'cel': raw_trajectory['{}_ordered'.format( name)] = self._get_reordered_array( traj_data['{}_traj_data'.format(name)], reordering) else: raw_trajectory['cells'] = numpy.array( traj_data['cells_traj_data']) if extension == 'pos': raw_trajectory['times'] = numpy.array( traj_data['{}_traj_times'.format(name)]) except IOError: out_dict['warnings'].append( 'Unable to open the {} file... skipping.'.format( extension.upper())) # =============== EVP trajectory ============================ try: with out_folder.open('{}.evp'.format( self._node.process_class._PREFIX)) as handle: matrix = numpy.genfromtxt(handle) # there might be a different format if the matrix has one row only try: matrix.shape[1] except IndexError: matrix = numpy.array(numpy.matrix(matrix)) if LooseVersion(out_dict['creator_version']) > LooseVersion('5.1'): # Between version 5.1 and 5.1.1, someone decided to change # the .evp output format, without any way to know that this # happened... SVN commit 11158. # I here use the version number to parse, plus some # heuristics to check that I'm doing the right thing #print "New version" raw_trajectory['steps'] = numpy.array(matrix[:, 0], dtype=int) raw_trajectory['evp_times'] = matrix[:, 1] # TPS, ps raw_trajectory[ 'electronic_kinetic_energy'] = matrix[:, 2] * CONSTANTS.hartree_to_ev # EKINC, eV raw_trajectory['cell_temperature'] = matrix[:, 3] # TEMPH, K raw_trajectory['ionic_temperature'] = matrix[:, 4] # TEMPP, K raw_trajectory[ 'scf_total_energy'] = matrix[:, 5] * CONSTANTS.hartree_to_ev # ETOT, eV raw_trajectory[ 'enthalpy'] = matrix[:, 6] * CONSTANTS.hartree_to_ev # ENTHAL, eV raw_trajectory[ 'enthalpy_plus_kinetic'] = matrix[:, 7] * CONSTANTS.hartree_to_ev # ECONS, eV raw_trajectory[ 'energy_constant_motion'] = matrix[:, 8] * CONSTANTS.hartree_to_ev # ECONT, eV raw_trajectory['volume'] = matrix[:, 9] * ( CONSTANTS.bohr_to_ang**3) # volume, angstrom^3 raw_trajectory['pressure'] = matrix[:, 10] # out_press, GPa else: #print "Old version" raw_trajectory['steps'] = numpy.array(matrix[:, 0], dtype=int) raw_trajectory[ 'electronic_kinetic_energy'] = matrix[:, 1] * CONSTANTS.hartree_to_ev # EKINC, eV raw_trajectory['cell_temperature'] = matrix[:, 2] # TEMPH, K raw_trajectory['ionic_temperature'] = matrix[:, 3] # TEMPP, K raw_trajectory[ 'scf_total_energy'] = matrix[:, 4] * CONSTANTS.hartree_to_ev # ETOT, eV raw_trajectory[ 'enthalpy'] = matrix[:, 5] * CONSTANTS.hartree_to_ev # ENTHAL, eV raw_trajectory[ 'enthalpy_plus_kinetic'] = matrix[:, 6] * CONSTANTS.hartree_to_ev # ECONS, eV raw_trajectory[ 'energy_constant_motion'] = matrix[:, 7] * CONSTANTS.hartree_to_ev # ECONT, eV raw_trajectory['volume'] = matrix[:, 8] * ( CONSTANTS.bohr_to_ang**3) # volume, angstrom^3 raw_trajectory['pressure'] = matrix[:, 9] # out_press, GPa raw_trajectory['evp_times'] = matrix[:, 10] # TPS, ps # Huristics to understand if it's correct. # A better heuristics could also try to fix possible issues # (in new versions of QE, it's possible to recompile it with # the __OLD_FORMAT flag to get back the old version format...) # but I won't do it, as there may be also other columns swapped. # Better to stop and ask the user to check what's going on. max_time_difference = abs( numpy.array(raw_trajectory['times']) - numpy.array(raw_trajectory['evp_times'])).max() if max_time_difference > 1.e-4: # It is typically ~1.e-7 due to roundoff errors # If there is a large discrepancy # it means there is something very weird going on... return self.exit(self.exit_codes.ERROR_READING_TRAJECTORY_DATA) # Delete evp_times in any case, it's a duplicate of 'times' del raw_trajectory['evp_times'] except IOError: out_dict['warnings'].append( 'Unable to open the EVP file... skipping.') # get the symbols from the input # TODO: I should have kinds in TrajectoryData input_structure = self.node.inputs.structure raw_trajectory['symbols'] = [ str(i.kind_name) for i in input_structure.sites ] traj = TrajectoryData() traj.set_trajectory( stepids=raw_trajectory['steps'], cells=raw_trajectory['cells'], symbols=raw_trajectory['symbols'], positions=raw_trajectory['positions_ordered'], times=raw_trajectory['times'], velocities=raw_trajectory['velocities_ordered'], ) for this_name in evp_keys: try: traj.set_array(this_name, raw_trajectory[this_name]) except KeyError: # Some columns may have not been parsed, skip pass self.out('output_trajectory', traj) # Remove big dictionaries that would be redundant # For atoms and cell, there is a small possibility that nothing is parsed # but then probably nothing moved. try: del out_dict['atoms'] except KeyError: pass try: del out_dict['cell'] except KeyError: pass try: del out_dict['ions_positions_stau'] except KeyError: pass try: del out_dict['ions_positions_svel'] except KeyError: pass try: del out_dict['ions_positions_taui'] except KeyError: pass # This should not be needed try: del out_dict['atoms_index_list'] except KeyError: pass # This should be already in the input try: del out_dict['atoms_if_pos_list'] except KeyError: pass # try: del out_dict['ions_positions_force'] except KeyError: pass # convert the dictionary into an AiiDA object output_params = Dict(dict=out_dict) self.out('output_parameters', output_params)
def parse(self, **kwargs): """ Parses the datafolder, stores results. """ # retrieve resources resources, exit_code = self.get_parsing_resources( kwargs, traj_in_temp=True, sys_info=True) if exit_code is not None: return exit_code trajectory_filename, trajectory_filepath, info_filepath = resources # parse log file log_data, exit_code = self.parse_log_file() if exit_code is not None: return exit_code # parse trajectory file try: timestep = self.node.inputs.parameters.dict.timestep positions, charges, step_ids, cells, symbols, time = read_lammps_trajectory( trajectory_filepath, timestep=timestep, log_warning_func=self.logger.warning) except Exception: traceback.print_exc() return self.exit_codes.ERROR_TRAJ_PARSING # save results into node output_data = log_data["data"] if 'units_style' in output_data: output_data.update(get_units_dict(output_data['units_style'], ["distance", "time", "energy"])) else: self.logger.warning("units missing in log") self.add_warnings_and_errors(output_data) self.add_standard_info(output_data) parameters_data = Dict(dict=output_data) self.out('results', parameters_data) # save trajectories into node trajectory_data = TrajectoryData() trajectory_data.set_trajectory( symbols, positions, stepids=step_ids, cells=cells, times=time) if charges is not None: trajectory_data.set_array('charges', charges) self.out('trajectory_data', trajectory_data) # parse the system data file if info_filepath: sys_data = ArrayData() try: with open(info_filepath) as handle: names = handle.readline().strip().split() for i, col in enumerate(np.loadtxt(info_filepath, skiprows=1, unpack=True)): sys_data.set_array(names[i], col) except Exception: traceback.print_exc() return self.exit_codes.ERROR_INFO_PARSING sys_data.set_attribute('units_style', output_data.get('units_style', None)) self.out('system_data', sys_data) if output_data["errors"]: return self.exit_codes.ERROR_LAMMPS_RUN
def parse(self, **kwargs): """ Receives a dictionary of retrieved nodes.retrieved. Top level logic of operation """ try: retrieved = self.retrieved except exceptions.NotExistent: return self.exit_codes.ERROR_NO_RETRIEVED_FOLDER output_folder = RetrievedFileManager( retrieved, kwargs.get('retrieved_temporary_folder')) warnings = [] exit_code_1 = None # NOTE parser options not used for not parser_opts = {} # NOT READILY IN USE input_dict = {} # check what is inside the folder filenames = output_folder.list_object_names() # Get calculation options options = self.node.get_options() seedname = options['seedname'] # at least the stdout should exist if options['output_filename'] not in filenames: self.logger.error("Standard output not found") return self.exit_codes.ERROR_NO_OUTPUT_FILE # The calculation is failed if there is any err file. err_filenames = [fname for fname in filenames if '.err' in fname] if err_filenames: exit_code_1 = 'ERROR_CASTEP_ERROR' # Add the content of err files err_contents = set() for fname in err_filenames: err_contents.add(output_folder.get_object_content(fname)) # Trajectory files has_md_geom = False out_md_geom_name_content = None for suffix in ('.geom', '.md'): fname = seedname + suffix if fname in filenames: out_md_geom_name_content = ( fname, output_folder.get_object_content(fname).split('\n')) has_md_geom = True break # Handling bands fname = seedname + '.bands' has_bands = fname in filenames if has_bands: out_bands_content = output_folder.get_object_content(fname).split( '\n') else: out_bands_content = None out_file = options['output_filename'] out_file_content = output_folder.get_object_content(out_file).split( '\n') ###### CALL THE RAW PASSING FUNCTION TO PARSE DATA ####### raw_parser = RawParser(out_lines=out_file_content, input_dict=input_dict, md_geom_info=out_md_geom_name_content, bands_lines=out_bands_content, **parser_opts) out_dict, trajectory_data, structure_data, bands_data, exit_code_2\ = raw_parser.parse() # Combine the exit codes use the more specific error exit_code = None for code in calc_exit_code: if code in (exit_code_2, exit_code_1): exit_code = code break # Append the final value of trajectory_data into out_dict last_value_keys = [ "free_energy", "total_energy", "zero_K_energy", "spin_density", "abs_spin_density", "enthalpy" ] for key in last_value_keys: add_last_if_exists(trajectory_data, key, out_dict) # Add warnings from this level out_dict["warnings"].extend(warnings) # Add error messages out_dict["error_messages"] = list(err_contents) ######## --- PROCESSING BANDS DATA -- ######## if has_bands or output_folder.has_file(seedname + '.castep_bin'): # Only use castep_bin if we are interested in SCF kpoints if output_folder.has_file(seedname + '.castep_bin') and ( self.castep_task.lower() not in NON_SCF_BAND_TASKS): self.logger.info("Using castep_bin file for the bands data.") bands_node = bands_from_castepbin(seedname, output_folder) if not self._has_empty_bands(bands_node): # Set if no other errors out_dict["warnings"].append( "At least one kpoint has no empty bands, energy/forces returned are not reliable." ) if exit_code == 'CALC_FINISHED': exit_code = "ERROR_NO_EMPTY_BANDS" else: bands_node = bands_to_bandsdata(**bands_data) self.out(out_ln['bands'], bands_node) ######## --- PROCESSING STRUCTURE DATA --- ######## no_optimise = False try: cell = structure_data["cell"] positions = structure_data["positions"] symbols = structure_data["symbols"] except KeyError: # Handle special case where CASTEP founds nothing to optimise, # hence we attached the input geometry as the output for warning in out_dict["warnings"]: if "there is nothing to optimise" in warning: no_optimise = True if no_optimise is True: self.out(out_ln['structure'], deepcopy(self.node.inputs.structure)) else: structure_node = structure_from_input(cell=cell, positions=positions, symbols=symbols) # Use the output label as the input label input_structure = self.node.inputs.structure structure_node = desort_structure(structure_node, input_structure) structure_node.label = input_structure.label self.out(out_ln['structure'], structure_node) ######### --- PROCESSING TRAJECTORY DATA --- ######## # If there is anything to save # It should... if trajectory_data: # Resorting indices - for recovering the original ordering of the # species in the input structure input_structure = self.node.inputs.structure idesort = get_desort_args(input_structure) # If we have .geom file, save as in a trajectory data if has_md_geom: try: positions = np.asarray( trajectory_data["positions"])[:, idesort] cells = trajectory_data["cells"] # Assume symbols do not change - symbols are the same for all frames symbols = np.asarray(trajectory_data["symbols"])[idesort] stepids = np.arange(len(positions)) except KeyError: out_dict["parser_warning"].append( "Cannot " "extract data from .geom file.") else: traj = TrajectoryData() traj.set_trajectory(stepids=np.asarray(stepids), cells=np.asarray(cells), symbols=np.asarray(symbols), positions=np.asarray(positions)) # Save the rest for name, value in trajectory_data.items(): # Skip saving empty arrays if len(value) == 0: continue array = np.asarray(value) # For forces/velocities we also need to resort the array if ("force" in name) or ("velocities" in name): array = array[:, idesort] traj.set_array(name, np.asarray(value)) self.out(out_ln['trajectory'], traj) # Or may there is nothing to optimise? still save a Trajectory data elif no_optimise is True: traj = TrajectoryData() input_structure = self.node.inputs.structure traj.set_trajectory(stepids=np.asarray([1]), cells=np.asarray([input_structure.cell]), symbols=np.asarray([ site.kind_name for site in input_structure.sites ]), positions=np.asarray([[ site.position for site in input_structure.sites ]])) # Save the rest for name, value in trajectory_data.items(): # Skip saving empty arrays if len(value) == 0: continue array = np.asarray(value) # For forces/velocities we also need to resort the array if ("force" in name) or ("velocities" in name): array = array[:, idesort] traj.set_array(name, np.asarray(value)) self.out(out_ln['trajectory'], traj) # Otherwise, save data into a ArrayData node else: out_array = ArrayData() for name, value in trajectory_data.items(): # Skip saving empty arrays if len(value) == 0: continue array = np.asarray(value) if ("force" in name) or ("velocities" in name): array = array[:, idesort] out_array.set_array(name, np.asarray(value)) self.out(out_ln['array'], out_array) ######## ---- PROCESSING OUTPUT DATA --- ######## output_params = Dict(dict=out_dict) self.out(out_ln['results'], output_params) # Return the exit code return self.exit_codes.__getattr__(exit_code)
def _parse_trajectory(self): """Abinit trajectory parser.""" def _voigt_to_tensor(voigt): tensor = np.zeros((3, 3)) tensor[0, 0] = voigt[0] tensor[1, 1] = voigt[1] tensor[2, 2] = voigt[2] tensor[1, 2] = voigt[3] tensor[0, 2] = voigt[4] tensor[0, 1] = voigt[5] tensor[2, 1] = tensor[1, 2] tensor[2, 0] = tensor[0, 2] tensor[1, 0] = tensor[0, 1] return tensor # Absolute path of the folder in which aiidao_GSR.nc is stored path = self.node.get_remote_workdir() # HIST Abinit NetCDF file - Default name is aiidao_HIST.nc fname = f'{self.node.get_attribute("prefix")}o_HIST.nc' if fname not in self.retrieved.list_object_names(): return self.exit_codes.ERROR_MISSING_OUTPUT_FILES with HistFile(path + '/' + fname) as hist_file: structures = hist_file.structures output_structure = StructureData(pymatgen=structures[-1]) with nc.Dataset(path + '/' + fname, 'r') as data_set: # pylint: disable=no-member n_steps = data_set.dimensions['time'].size energy_ha = data_set.variables['etotal'][:].data # Ha energy_kin_ha = data_set.variables['ekin'][:].data # Ha forces_cart_ha_bohr = data_set.variables[ 'fcart'][:, :, :].data # Ha/bohr positions_cart_bohr = data_set.variables[ 'xcart'][:, :, :].data # bohr stress_voigt = data_set.variables['strten'][:, :].data # Ha/bohr^3 stepids = np.arange(n_steps) symbols = np.array([specie.symbol for specie in structures[0].species], dtype='<U2') cells = np.array( [structure.lattice.matrix for structure in structures]).reshape( (n_steps, 3, 3)) energy = energy_ha * units.Ha_to_eV energy_kin = energy_kin_ha * units.Ha_to_eV forces = forces_cart_ha_bohr * units.Ha_to_eV / units.bohr_to_ang positions = positions_cart_bohr * units.bohr_to_ang stress = np.array([_voigt_to_tensor(sv) for sv in stress_voigt ]) * units.Ha_to_eV / units.bohr_to_ang**3 total_force = np.array([np.sum(f) for f in forces_cart_ha_bohr ]) * units.Ha_to_eV / units.bohr_to_ang output_trajectory = TrajectoryData() output_trajectory.set_trajectory(stepids=stepids, cells=cells, symbols=symbols, positions=positions) output_trajectory.set_array('energy', energy) # eV output_trajectory.set_array('energy_kin', energy_kin) # eV output_trajectory.set_array('forces', forces) # eV/angstrom output_trajectory.set_array('stress', stress) # eV/angstrom^3 output_trajectory.set_array('total_force', total_force) # eV/angstrom self.out('output_trajectory', output_trajectory) self.out('output_structure', output_structure)