def parse(self): """Parse micro files of premod.""" # Check if retrieved folder is present. try: output_folder = self.premod_calc.retrieved except exceptions.NotExistent: return self.exit_codes.ERROR_NO_RETRIEVED_FOLDER # Try to load the micro files. files_in_folder = output_folder.list_object_names('PreModRun') micro_files = [ path.join('PreModRun', filename) for filename in files_in_folder if 'PreModRun_Micro' in filename ] index = 0 micro_array = DataFactory('array')() for filename in micro_files: try: with output_folder.open(filename, 'r') as handle: result = self._parse_micro(handle) except (OSError, IOError): return self.exit_code.ERROR_READING_MICRO_FILE if result is None: return self.exit_code.ERROR_INVALID_MICRO_OUTPUT micro_array.set_array('step_' + str(index), result) index = index + 1 return {'micro': micro_array}
def store_quantities(quantities_container): """Stores the quantities to keep data provenance.""" quantities_container_list = quantities_container.get_list() quantities_container_array = DataFactory('array')() quantities_container_array.set_array('quantities', np.array(quantities_container_list)) return quantities_container_array
def get_random_displacements(structure, number_of_snapshots, temperature, **data): displacements = [] forces = [] energies = [] for i in range(len(data) // 2): forces.append(data['forces_%d' % (i + 1)].get_array('force_sets')) if 'energies' in data['forces_%d' % (i + 1)].get_arraynames(): energies.append(data['forces_%d' % (i + 1)].get_array('energies')) phonon_setting_info = data['ph_info_%d' % (i + 1)] dataset = phonon_setting_info['displacement_dataset'] disps, _ = get_displacements_and_forces(dataset) displacements.append(disps) d = np.concatenate(displacements, axis=0) f = np.concatenate(forces, axis=0) idx = None if len(energies) == len(forces) and 'include_ratio' in data: all_energies = np.concatenate(energies) if len(all_energies) == len(f): ratio = data['include_ratio'].value if 0 < ratio and ratio < 1: num_include = int(np.ceil(ratio * len(all_energies))) if num_include > len(all_energies): num_include = len(all_energies) idx = np.argsort(all_energies)[:num_include] d = d[idx] f = f[idx] phonon_setting_info = data['ph_info_1'] smat = phonon_setting_info['supercell_matrix'] ph = phonopy.load(unitcell=phonopy_atoms_from_structure(structure), supercell_matrix=smat, primitive_matrix='auto') ph.dataset = {'displacements': d, 'forces': f} ph.produce_force_constants(fc_calculator='alm') _modify_force_constants(ph) if 'random_seed' in data: _random_seed = data['random_seed'].value else: _random_seed = None ph.generate_displacements(number_of_snapshots=number_of_snapshots.value, random_seed=_random_seed, temperature=temperature.value) ret_dict = {'displacement_dataset': Dict(dict=ph.dataset)} if idx is not None: array = DataFactory('array')() array.set_array('supercell_energies', all_energies) array.set_array('included_supercell_indices', idx) ret_dict['supercell_energies'] = array return ret_dict
def get_force_constants(structure, phonon_settings, force_sets): params = {} phonon = get_phonopy_instance(structure, phonon_settings, params) phonon.dataset = phonon_settings['displacement_dataset'] phonon.forces = force_sets.get_array('force_sets') phonon.produce_force_constants() force_constants = DataFactory('array')() force_constants.set_array('force_constants', phonon.force_constants) force_constants.set_array('p2s_map', phonon.primitive.p2s_map) force_constants.label = 'force_constants' return force_constants
def store_total_energies(total_energies): """Stores the total energies in ArrayData to keep data provenance.""" total_energies_list = total_energies.get_list() # Let us also sort by volume as we picked the entries in the structures by random # above total_energies_array = np.array(total_energies_list) total_energies_array_sorted = total_energies_array[ total_energies_array[:, 0].argsort()] array_data = DataFactory('array')() array_data.set_array('eos', total_energies_array_sorted) return array_data
def parse_dos(self, data): """A function that returns ArrayData with densities of states. The array should have shape (nproj+2, ne), where ne is the number of energy points, and nproj is the number of DOS projections""" if not data: raise ValueError("Sorry, didn't find dos info in fort.25") from aiida.plugins import DataFactory array_data = DataFactory("array")() array = [data["e"], data["dos_up"]] if data['dos_down'] is not None: array.append(-1 * data['dos_down']) array_data.set_array("dos", np.vstack(array)) return array_data,
def SavePositions(positions): """AiiDA calfunction to store positions Parameters ---------- positions : numpy.ndarray Returns ------- AiiDA node of stored data """ positions = np.array(positions.get_array('structure_positions')) PossArray = DataFactory('array')() PossArray.set_array('structure_positions', positions) return PossArray
def _parse_time(self, wb): """Parse the content which is indexed by time.""" # Fetch active sheet ws = wb.active # Load time as NumPy array using the parameters times_raw = [ timestep[0].value for timestep in ws[self.parameters['time_range']] ] reference_time = times_raw[0] times = np.array([(timestep - reference_time).total_seconds() for timestep in times_raw]) # Load data as NumPy array using the parameter data = np.array([[i.value for i in j] for j in ws[self.parameters['data_range']]]) # Combine the time steps and data time_data = np.column_stack((times, data)) # Load labels labels = [ label.value for label in ws[self.parameters['label_range']][0] ] # Override with manual labels for key, item in self.parameters['manual_label'].items(): labels.insert(key, item) # Check that we have labels for all data (only check first row/column) if time_data[0].shape[0] != len(labels): raise ValueError( 'Some of the data is missing labels, please correct the supplied parameters.' ) # Extract comments comments = [[str(i.value) for i in j] for j in ws[self.parameters['comment_range']]] # Compose data and metadata nodes data = DataFactory('array')() data.set_array('content', time_data) metadata = DataFactory('dict')(dict={ 'start_time': reference_time.utcnow(), 'comments': comments, 'labels': labels }) return {'data': data, 'metadata': metadata}
def get_force_sets(**forces_dict): forces = [] energies = [] for i in range(len(forces_dict)): label = "forces_%03d" % (i + 1) if label in forces_dict: forces.append(forces_dict[label].get_array('final')) label = "misc_%03d" % (i + 1) if label in forces_dict: energies.append( forces_dict[label]['total_energies']['energy_no_entropy']) assert len(forces) == sum(['forces' in k for k in forces_dict]) force_sets = DataFactory('array')() force_sets.set_array('force_sets', np.array(forces)) if energies: force_sets.set_array('energies', np.array(energies)) force_sets.label = 'force_sets' return force_sets
def finalize(self): """ Finalize the workchain. Take the contact field container and set is as an output of this workchain. """ # Due to data provenance we cannot return AiiDA data containers that have # not been passed through a calcfunction, workfunction or a workchain. Create this now. #all_contact_field = store_all_contact_field(DataFactory('list')(list=self.ctx.all_contact_field)) #dict_data = DataFactory('dict')(dict=self.ctx.structure_contact) field_dict = DataFactory('dict')(dict=self.ctx.contact_field_dict) positions = np.array(self.ctx.positions_array) PossArray = DataFactory('array')() PossArray.set_array('structure_positions', positions) # And then store the output on the workchain #self.out('structure_contact_field_dict', dict_data) self.out('contact_field_dict', SaveContactField(field_dict)) self.out('structure_positions', SavePositions(PossArray))
def get_data_from_node_id(node_id): n = load_node(node_id.value) if 'structure' in n.inputs: cell = phonopy_atoms_from_structure(n.inputs.structure) structure = phonopy_atoms_to_structure(cell) else: raise RuntimeError("Crystal structure could not be found.") if 'born_charges' in n.outputs and 'dielectrics' in n.outputs: born = DataFactory('array')() born.set_array('born_charges', n.outputs.born_charges.get_array('born_charges')) born.label = 'born_charges' epsilon = DataFactory('array')() epsilon.set_array('epsilon', n.outputs.dielectrics.get_array('epsilon')) epsilon.label = 'epsilon' return { 'born_charges': born, 'dielectrics': epsilon, 'structure': structure } elif 'forces' in n.outputs: forces = DataFactory('array')() forces.set_array('final', n.outputs.forces.get_array('final')) forces.label = 'forces' return {'forces': forces, 'structure': structure} else: raise RuntimeError("Forces or NAC params were not found.")
def _parse(self, file_handle): """Parse the content of the data file as a NumPy array.""" data = file_handle.readlines() data_no_comments = [] comments = [] labels = None try: separator = self.parameters['separator'] except KeyError: separator = ' ' try: comment_string = self.parameters['comment_string'] except KeyError: comment_string = '#' for line in data: line = line.strip() if not line.startswith(comment_string): data_no_comments.append(line) else: comments.append(line) if self.parameters.get('labels'): labels = data_no_comments[0].split(separator) # Convert to array array = string_to_float(data_no_comments[1:], separator) # Compose data and metadata nodes data = DataFactory('array')() data.set_array('content', array) metadata = DataFactory('dict')(dict={ 'comments': comments, 'labels': labels }) return {'data': data, 'metadata': metadata}
def calculate_concentration_from_area(parameters_data, calibration_data, data): import numpy as np """Calculate the concentration from the area and the calibration.""" channels = [item for item in data.get_arraynames() if item != 'time' and item != 'id'] parameters = parameters_data.get_dict() start_slice = 0 concentration_data = DataFactory('array')() for index, channel in enumerate(channels): calibration = [] calibration_species = [] for item in calibration_data[index]: calibration.append(list(item.values())[0]) calibration_species.append(list(item.keys())[0] + ' area') calibration = np.array(calibration) slicing_index = 0 for index, item in enumerate(parameters['data_layout'][index]): if 'concentration' in list(item.keys())[0]: slicing_index = slicing_index + 1 area = data.get_array(channel) # concentration = area x calibration concentration = area[:,slicing_index::] * calibration concentration_data.set_array(channel, concentration) return concentration_data
def get_nac_params(born_charges, epsilon, nac_structure, **params): """Obtain Born effective charges and dielectric constants in primitive cell When Born effective charges and dielectric constants are calculated within phonopy workchain, those values are calculated in the primitive cell. However using immigrant, the cell may not be primitive cell and can be unit cell. In this case, conversion of data is necessary. This conversion needs information of the structure where those values were calcualted and the target primitive cell structure. Two kargs parameters primitive : StructureData symmetry_tolerance : Float """ from phonopy.structure.symmetry import symmetrize_borns_and_epsilon borns = born_charges.get_array('born_charges') eps = epsilon.get_array('epsilon') nac_cell = phonopy_atoms_from_structure(nac_structure) kargs = {} if 'symmetry_tolerance' in params: kargs['symprec'] = params['symmetry_tolerance'].value if 'primitive' in params: pcell = phonopy_atoms_from_structure(params['primitive']) kargs['primitive'] = pcell borns_, epsilon_ = symmetrize_borns_and_epsilon(borns, eps, nac_cell, **kargs) nac_params = DataFactory('array')() nac_params.set_array('born_charges', borns_) nac_params.set_array('epsilon', epsilon_) nac_params.label = 'born_charges & epsilon' return nac_params
def _parse(self, file_handle): # pylint: disable=too-many-locals """Parse the content of GC file as a NumPy array.""" # Set the separator try: separator = self.parameters['separator'] except KeyError: separator = ' ' # Fetch comment and label ranges try: comment_range = self.parameters['comment_range'] except KeyError: comment_range = None # Read content content = file_handle.readlines() comments = None labels = None separator = self.parameters['separator'] # Fetch data layout data_layout = self.parameters['data_layout'] # Fetch comments if specified shift_index = 0 if comment_range: if '-' not in comment_range and ',' not in comment_range: # Only comments on one line comments = content[int(self.parameters['comment_range'])] shift_index = shift_index + 1 else: raise NotImplementedError date_time = [] labels = [] data = [] time_index = [] num_channels = len(data_layout) num_fields = [] for channel in range(num_channels): data.append([]) # Fetch location of the time steps time_index.append([ index for index, item in enumerate(data_layout[channel]) if 'time' in item ]) num_fields.append(len(data_layout[channel])) # Build labels labels.append( [list(item.keys())[0] for item in data_layout[channel]]) # Check that only one time index is given if True in [len(item) > 1 for item in time_index]: raise ValueError( 'More than one time entry per channel. Please correct the configuration.' ) # Make sure we only have integers in the list (find a more clever way to do this) time_index = [item[0] for item in time_index] # Start extracting the actual data for line in content[self.parameters['data_start_line']:]: line = line.split(separator) # Replace all empty strings with a zero (later converted to float) line = ['0.0' if item == '' else item for item in line] start_channel_index = 0 for channel in range(num_channels): if channel == 0: # Fetch and remove time entries (assumed the same between channels) date_time.append( parser.parse(line.pop(time_index[channel]).strip(), fuzzy=True)) else: # For the other channels, remove time data line.pop(start_channel_index + time_index[channel]) # Convert from string to target data for each channel data[channel].append([ float(item) for index, item in enumerate( line[start_channel_index:start_channel_index + num_fields[channel]]) ]) start_channel_index = start_channel_index + num_fields[channel] # Calculate time difference for each step and store that instead of absolute times reference_time = date_time[0] date_time = [(time - reference_time).total_seconds() for time in date_time] # Compose data, time and metadata nodes array = DataFactory('array')() for channel in range(num_channels): array.set_array('channel_' + str(channel + 1), np.array(data[channel])) array.set_array('time', np.array(date_time)) meta = DataFactory('dict')(dict={ 'start_time': reference_time.utcnow(), 'comments': comments, 'labels': labels }) return {'data': array, 'metadata': meta}
def _parse(self, file_handle): # pylint: disable=too-many-locals """Parse the content of GC file as a NumPy array.""" # Set the separator try: separator = self.parameters['separator'] except KeyError: separator = ' ' # Fetch comment and label ranges try: comment_range = self.parameters['comment_range'] except KeyError: comment_range = None # Read content content = file_handle.readlines() comments = None labels = None separator = self.parameters['separator'] # Fetch data layout data_layout = self.parameters['data_layout'] # Fetch comments if specified shift_index = 0 if comment_range: if '-' not in comment_range and ',' not in comment_range: # Only comments on one line comments = content[int(self.parameters['comment_range'])] shift_index = shift_index + 1 else: raise NotImplementedError date_time = [] sample_id = [] labels = [] data = [] time_index = [] id_index = [] ignore_index = [] num_channels = len(data_layout) num_fields = [] for channel in range(num_channels): data.append([]) fields = 0 for index, item in enumerate(data_layout[channel]): if 'time' in item: time_index.append([index]) elif 'id' in item: id_index.append([index]) elif 'ignore' in item: ignore_index.append([index]) else: fields = fields + 1 num_fields.append(fields) # Build labels labels.append([ list(item.keys())[0] for item in data_layout[channel] if list(item.keys())[0] != 'time' and list(item.keys())[0] != 'id' and list(item.keys())[0] != 'ignore' ]) # Check that only one time index is given if True in [len(item) > 1 for item in time_index]: raise ValueError( 'More than one time entry per channel. Please correct the configuration.' ) # Make sure we only have integers in the list (find a more clever way to do this) time_index = [item[0] for item in time_index] # Check that only one id index is given if True in [len(item) > 1 for item in id_index]: raise ValueError( 'More than one id entry per channel. Please correct the configuration.' ) # Make sure we only have integers in the list (find a more clever way to do this) id_index = [item[0] for item in id_index] # Start extracting the actual data for line in content[self.parameters['data_start_line']:]: line = line.split(separator) start_channel_index = 0 for channel in range(num_channels): id_ind = id_index[channel] time_ind = time_index[channel] try: # Remove ignore columns, but allow not having any for ignore_ind in ignore_index[channel]: line.pop(start_channel_index + ignore_ind) if ignore_ind < id_ind: id_ind = id_ind - 1 if ignore_ind < time_ind: time_ind = time_ind - 1 except IndexError: pass if id_ind > time_index[channel]: id_ind = id_ind - 1 if channel == 0: # Fetch and remove time entries date_time.append( parser.parse(line.pop(time_index[channel]).strip(), fuzzy=True)) # Fetch and remove id entries (assumed the same between channels) try: s_id = int(line.pop(id_ind).strip()) except ValueError: s_id = 0 sample_id.append(s_id) else: # For the other channels, remove time and id data line.pop(start_channel_index + time_index[channel]) line.pop(start_channel_index + id_ind) # Convert from string to target data for each channel try: data[channel].append([ float(item) for index, item in enumerate( line[start_channel_index:start_channel_index + num_fields[channel]]) ]) except ValueError as e: raise ValueError( 'A field with an empty string might have been detected. Are you sure you have ' 'specified correct ignore fields in the parameters?' ) from e start_channel_index = start_channel_index + num_fields[channel] # Calculate time difference for each step and store that instead of absolute times reference_time = date_time[0] date_time = [(time - reference_time).total_seconds() for time in date_time] # Compose data, time and metadata nodes array_data = DataFactory('array')() for channel in range(num_channels): array_data.set_array('channel_' + str(channel + 1), np.array(data[channel])) array_data.set_array('time', np.array(date_time)) array_data.set_array('id', np.array(sample_id[channel])) meta = DataFactory('dict')( dict={ # Consider to replace the string conversion in the future # problem is that we also need timzone information. 'start_time': str(reference_time.utcnow()), 'comments': comments, 'labels': labels }) return {'data': array_data, 'metadata': meta}