Example #1
0
    def parse(self):
        """Parse micro files of premod."""

        # Check if retrieved folder is present.
        try:
            output_folder = self.premod_calc.retrieved
        except exceptions.NotExistent:
            return self.exit_codes.ERROR_NO_RETRIEVED_FOLDER
        # Try to load the micro files.
        files_in_folder = output_folder.list_object_names('PreModRun')
        micro_files = [
            path.join('PreModRun', filename) for filename in files_in_folder
            if 'PreModRun_Micro' in filename
        ]
        index = 0
        micro_array = DataFactory('array')()
        for filename in micro_files:
            try:
                with output_folder.open(filename, 'r') as handle:
                    result = self._parse_micro(handle)
            except (OSError, IOError):
                return self.exit_code.ERROR_READING_MICRO_FILE
            if result is None:
                return self.exit_code.ERROR_INVALID_MICRO_OUTPUT
            micro_array.set_array('step_' + str(index), result)
            index = index + 1

        return {'micro': micro_array}
Example #2
0
def store_quantities(quantities_container):
    """Stores the quantities to keep data provenance."""
    quantities_container_list = quantities_container.get_list()
    quantities_container_array = DataFactory('array')()
    quantities_container_array.set_array('quantities',
                                         np.array(quantities_container_list))
    return quantities_container_array
Example #3
0
def get_random_displacements(structure, number_of_snapshots, temperature,
                             **data):
    displacements = []
    forces = []
    energies = []

    for i in range(len(data) // 2):
        forces.append(data['forces_%d' % (i + 1)].get_array('force_sets'))
        if 'energies' in data['forces_%d' % (i + 1)].get_arraynames():
            energies.append(data['forces_%d' % (i + 1)].get_array('energies'))
        phonon_setting_info = data['ph_info_%d' % (i + 1)]
        dataset = phonon_setting_info['displacement_dataset']
        disps, _ = get_displacements_and_forces(dataset)
        displacements.append(disps)
    d = np.concatenate(displacements, axis=0)
    f = np.concatenate(forces, axis=0)

    idx = None
    if len(energies) == len(forces) and 'include_ratio' in data:
        all_energies = np.concatenate(energies)
        if len(all_energies) == len(f):
            ratio = data['include_ratio'].value
            if 0 < ratio and ratio < 1:
                num_include = int(np.ceil(ratio * len(all_energies)))
                if num_include > len(all_energies):
                    num_include = len(all_energies)
                idx = np.argsort(all_energies)[:num_include]
                d = d[idx]
                f = f[idx]

    phonon_setting_info = data['ph_info_1']
    smat = phonon_setting_info['supercell_matrix']
    ph = phonopy.load(unitcell=phonopy_atoms_from_structure(structure),
                      supercell_matrix=smat,
                      primitive_matrix='auto')
    ph.dataset = {'displacements': d, 'forces': f}
    ph.produce_force_constants(fc_calculator='alm')

    _modify_force_constants(ph)

    if 'random_seed' in data:
        _random_seed = data['random_seed'].value
    else:
        _random_seed = None

    ph.generate_displacements(number_of_snapshots=number_of_snapshots.value,
                              random_seed=_random_seed,
                              temperature=temperature.value)

    ret_dict = {'displacement_dataset': Dict(dict=ph.dataset)}

    if idx is not None:
        array = DataFactory('array')()
        array.set_array('supercell_energies', all_energies)
        array.set_array('included_supercell_indices', idx)
        ret_dict['supercell_energies'] = array

    return ret_dict
Example #4
0
def get_force_constants(structure, phonon_settings, force_sets):
    params = {}
    phonon = get_phonopy_instance(structure, phonon_settings, params)
    phonon.dataset = phonon_settings['displacement_dataset']
    phonon.forces = force_sets.get_array('force_sets')
    phonon.produce_force_constants()
    force_constants = DataFactory('array')()
    force_constants.set_array('force_constants', phonon.force_constants)
    force_constants.set_array('p2s_map', phonon.primitive.p2s_map)
    force_constants.label = 'force_constants'

    return force_constants
Example #5
0
def store_total_energies(total_energies):
    """Stores the total energies in ArrayData to keep data provenance."""
    total_energies_list = total_energies.get_list()
    # Let us also sort by volume as we picked the entries in the structures by random
    # above
    total_energies_array = np.array(total_energies_list)
    total_energies_array_sorted = total_energies_array[
        total_energies_array[:, 0].argsort()]
    array_data = DataFactory('array')()
    array_data.set_array('eos', total_energies_array_sorted)

    return array_data
    def parse_dos(self, data):
        """A function that returns ArrayData with densities of states. The array should have shape (nproj+2, ne),
        where ne is the number of energy points, and nproj is the number of DOS projections"""
        if not data:
            raise ValueError("Sorry, didn't find dos info in fort.25")

        from aiida.plugins import DataFactory
        array_data = DataFactory("array")()
        array = [data["e"], data["dos_up"]]
        if data['dos_down'] is not None:
            array.append(-1 * data['dos_down'])
        array_data.set_array("dos", np.vstack(array))
        return array_data,
Example #7
0
def SavePositions(positions):
    """AiiDA calfunction to store positions
    
    Parameters
    ----------
    positions : numpy.ndarray
    
    Returns
    -------
    AiiDA node of stored data
    """
    positions = np.array(positions.get_array('structure_positions'))
    PossArray = DataFactory('array')()
    PossArray.set_array('structure_positions', positions)
    return PossArray
Example #8
0
    def _parse_time(self, wb):
        """Parse the content which is indexed by time."""

        # Fetch active sheet
        ws = wb.active

        # Load time as NumPy array using the parameters
        times_raw = [
            timestep[0].value for timestep in ws[self.parameters['time_range']]
        ]
        reference_time = times_raw[0]
        times = np.array([(timestep - reference_time).total_seconds()
                          for timestep in times_raw])

        # Load data as NumPy array using the parameter
        data = np.array([[i.value for i in j]
                         for j in ws[self.parameters['data_range']]])

        # Combine the time steps and data
        time_data = np.column_stack((times, data))

        # Load labels
        labels = [
            label.value for label in ws[self.parameters['label_range']][0]
        ]
        # Override with manual labels
        for key, item in self.parameters['manual_label'].items():
            labels.insert(key, item)
        # Check that we have labels for all data (only check first row/column)
        if time_data[0].shape[0] != len(labels):
            raise ValueError(
                'Some of the data is missing labels, please correct the supplied parameters.'
            )
        # Extract comments
        comments = [[str(i.value) for i in j]
                    for j in ws[self.parameters['comment_range']]]
        # Compose data and metadata nodes
        data = DataFactory('array')()
        data.set_array('content', time_data)
        metadata = DataFactory('dict')(dict={
            'start_time': reference_time.utcnow(),
            'comments': comments,
            'labels': labels
        })

        return {'data': data, 'metadata': metadata}
Example #9
0
def get_force_sets(**forces_dict):
    forces = []
    energies = []
    for i in range(len(forces_dict)):
        label = "forces_%03d" % (i + 1)
        if label in forces_dict:
            forces.append(forces_dict[label].get_array('final'))
        label = "misc_%03d" % (i + 1)
        if label in forces_dict:
            energies.append(
                forces_dict[label]['total_energies']['energy_no_entropy'])

    assert len(forces) == sum(['forces' in k for k in forces_dict])

    force_sets = DataFactory('array')()
    force_sets.set_array('force_sets', np.array(forces))
    if energies:
        force_sets.set_array('energies', np.array(energies))
    force_sets.label = 'force_sets'
    return force_sets
Example #10
0
    def finalize(self):
        """
        Finalize the workchain.
        Take the contact field container and set is as an output of this workchain.
        """
        # Due to data provenance we cannot return AiiDA data containers that have
        # not been passed through a calcfunction, workfunction or a workchain. Create this now.
        #all_contact_field = store_all_contact_field(DataFactory('list')(list=self.ctx.all_contact_field))

        #dict_data = DataFactory('dict')(dict=self.ctx.structure_contact)

        field_dict = DataFactory('dict')(dict=self.ctx.contact_field_dict)
        positions = np.array(self.ctx.positions_array)
        PossArray = DataFactory('array')()
        PossArray.set_array('structure_positions', positions)

        # And then store the output on the workchain
        #self.out('structure_contact_field_dict', dict_data)
        self.out('contact_field_dict', SaveContactField(field_dict))
        self.out('structure_positions', SavePositions(PossArray))
Example #11
0
def get_data_from_node_id(node_id):
    n = load_node(node_id.value)
    if 'structure' in n.inputs:
        cell = phonopy_atoms_from_structure(n.inputs.structure)
        structure = phonopy_atoms_to_structure(cell)
    else:
        raise RuntimeError("Crystal structure could not be found.")

    if 'born_charges' in n.outputs and 'dielectrics' in n.outputs:
        born = DataFactory('array')()
        born.set_array('born_charges',
                       n.outputs.born_charges.get_array('born_charges'))
        born.label = 'born_charges'
        epsilon = DataFactory('array')()
        epsilon.set_array('epsilon',
                          n.outputs.dielectrics.get_array('epsilon'))
        epsilon.label = 'epsilon'
        return {
            'born_charges': born,
            'dielectrics': epsilon,
            'structure': structure
        }
    elif 'forces' in n.outputs:
        forces = DataFactory('array')()
        forces.set_array('final', n.outputs.forces.get_array('final'))
        forces.label = 'forces'
        return {'forces': forces, 'structure': structure}
    else:
        raise RuntimeError("Forces or NAC params were not found.")
Example #12
0
    def _parse(self, file_handle):
        """Parse the content of the data file as a NumPy array."""
        data = file_handle.readlines()
        data_no_comments = []
        comments = []
        labels = None

        try:
            separator = self.parameters['separator']
        except KeyError:
            separator = ' '

        try:
            comment_string = self.parameters['comment_string']
        except KeyError:
            comment_string = '#'

        for line in data:
            line = line.strip()
            if not line.startswith(comment_string):
                data_no_comments.append(line)
            else:
                comments.append(line)

        if self.parameters.get('labels'):
            labels = data_no_comments[0].split(separator)
        # Convert to array
        array = string_to_float(data_no_comments[1:], separator)

        # Compose data and metadata nodes
        data = DataFactory('array')()
        data.set_array('content', array)
        metadata = DataFactory('dict')(dict={
            'comments': comments,
            'labels': labels
        })

        return {'data': data, 'metadata': metadata}
Example #13
0
def calculate_concentration_from_area(parameters_data, calibration_data, data):
    import numpy as np
    """Calculate the concentration from the area and the calibration."""
    channels = [item for item in data.get_arraynames() if item != 'time' and item != 'id']
    parameters = parameters_data.get_dict()
    start_slice = 0
    concentration_data = DataFactory('array')()
    for index, channel in enumerate(channels):
        calibration = []
        calibration_species = []
        for item in calibration_data[index]:
            calibration.append(list(item.values())[0])
            calibration_species.append(list(item.keys())[0] + ' area')
        calibration = np.array(calibration)
        slicing_index = 0
        for index, item in enumerate(parameters['data_layout'][index]):
            if 'concentration' in list(item.keys())[0]:
                slicing_index = slicing_index + 1
        area = data.get_array(channel)
        # concentration = area x calibration
        concentration = area[:,slicing_index::] * calibration
        concentration_data.set_array(channel, concentration)
    return concentration_data
Example #14
0
def get_nac_params(born_charges, epsilon, nac_structure, **params):
    """Obtain Born effective charges and dielectric constants in primitive cell

    When Born effective charges and dielectric constants are calculated within
    phonopy workchain, those values are calculated in the primitive cell.
    However using immigrant, the cell may not be primitive cell and can be
    unit cell. In this case, conversion of data is necessary. This conversion
    needs information of the structure where those values were calcualted and
    the target primitive cell structure.

    Two kargs parameters
    primitive : StructureData
    symmetry_tolerance : Float

    """
    from phonopy.structure.symmetry import symmetrize_borns_and_epsilon

    borns = born_charges.get_array('born_charges')
    eps = epsilon.get_array('epsilon')

    nac_cell = phonopy_atoms_from_structure(nac_structure)
    kargs = {}
    if 'symmetry_tolerance' in params:
        kargs['symprec'] = params['symmetry_tolerance'].value
    if 'primitive' in params:
        pcell = phonopy_atoms_from_structure(params['primitive'])
        kargs['primitive'] = pcell
    borns_, epsilon_ = symmetrize_borns_and_epsilon(borns, eps, nac_cell,
                                                    **kargs)

    nac_params = DataFactory('array')()
    nac_params.set_array('born_charges', borns_)
    nac_params.set_array('epsilon', epsilon_)
    nac_params.label = 'born_charges & epsilon'

    return nac_params
Example #15
0
    def _parse(self, file_handle):  # pylint: disable=too-many-locals
        """Parse the content of GC file as a NumPy array."""

        # Set the separator
        try:
            separator = self.parameters['separator']
        except KeyError:
            separator = ' '

        # Fetch comment and label ranges
        try:
            comment_range = self.parameters['comment_range']
        except KeyError:
            comment_range = None

        # Read content
        content = file_handle.readlines()
        comments = None
        labels = None
        separator = self.parameters['separator']

        # Fetch data layout
        data_layout = self.parameters['data_layout']

        # Fetch comments if specified
        shift_index = 0
        if comment_range:
            if '-' not in comment_range and ',' not in comment_range:
                # Only comments on one line
                comments = content[int(self.parameters['comment_range'])]
                shift_index = shift_index + 1
            else:
                raise NotImplementedError

        date_time = []
        labels = []
        data = []
        time_index = []
        num_channels = len(data_layout)
        num_fields = []
        for channel in range(num_channels):
            data.append([])
            # Fetch location of the time steps
            time_index.append([
                index for index, item in enumerate(data_layout[channel])
                if 'time' in item
            ])
            num_fields.append(len(data_layout[channel]))
            # Build labels
            labels.append(
                [list(item.keys())[0] for item in data_layout[channel]])

        # Check that only one time index is given
        if True in [len(item) > 1 for item in time_index]:
            raise ValueError(
                'More than one time entry per channel. Please correct the configuration.'
            )
        # Make sure we only have integers in the list (find a more clever way to do this)
        time_index = [item[0] for item in time_index]

        # Start extracting the actual data
        for line in content[self.parameters['data_start_line']:]:
            line = line.split(separator)
            # Replace all empty strings with a zero (later converted to float)
            line = ['0.0' if item == '' else item for item in line]
            start_channel_index = 0
            for channel in range(num_channels):
                if channel == 0:
                    # Fetch and remove time entries (assumed the same between channels)
                    date_time.append(
                        parser.parse(line.pop(time_index[channel]).strip(),
                                     fuzzy=True))
                else:
                    # For the other channels, remove time data
                    line.pop(start_channel_index + time_index[channel])
                # Convert from string to target data for each channel
                data[channel].append([
                    float(item) for index, item in enumerate(
                        line[start_channel_index:start_channel_index +
                             num_fields[channel]])
                ])
                start_channel_index = start_channel_index + num_fields[channel]

        # Calculate time difference for each step and store that instead of absolute times
        reference_time = date_time[0]
        date_time = [(time - reference_time).total_seconds()
                     for time in date_time]
        # Compose data, time and metadata nodes
        array = DataFactory('array')()
        for channel in range(num_channels):
            array.set_array('channel_' + str(channel + 1),
                            np.array(data[channel]))
        array.set_array('time', np.array(date_time))
        meta = DataFactory('dict')(dict={
            'start_time': reference_time.utcnow(),
            'comments': comments,
            'labels': labels
        })

        return {'data': array, 'metadata': meta}
Example #16
0
    def _parse(self, file_handle):  # pylint: disable=too-many-locals
        """Parse the content of GC file as a NumPy array."""

        # Set the separator
        try:
            separator = self.parameters['separator']
        except KeyError:
            separator = ' '

        # Fetch comment and label ranges
        try:
            comment_range = self.parameters['comment_range']
        except KeyError:
            comment_range = None

        # Read content
        content = file_handle.readlines()
        comments = None
        labels = None
        separator = self.parameters['separator']

        # Fetch data layout
        data_layout = self.parameters['data_layout']

        # Fetch comments if specified
        shift_index = 0
        if comment_range:
            if '-' not in comment_range and ',' not in comment_range:
                # Only comments on one line
                comments = content[int(self.parameters['comment_range'])]
                shift_index = shift_index + 1
            else:
                raise NotImplementedError

        date_time = []
        sample_id = []
        labels = []
        data = []
        time_index = []
        id_index = []
        ignore_index = []
        num_channels = len(data_layout)
        num_fields = []
        for channel in range(num_channels):
            data.append([])
            fields = 0
            for index, item in enumerate(data_layout[channel]):
                if 'time' in item:
                    time_index.append([index])
                elif 'id' in item:
                    id_index.append([index])
                elif 'ignore' in item:
                    ignore_index.append([index])
                else:
                    fields = fields + 1
            num_fields.append(fields)
            # Build labels
            labels.append([
                list(item.keys())[0] for item in data_layout[channel]
                if list(item.keys())[0] != 'time' and list(item.keys())[0] !=
                'id' and list(item.keys())[0] != 'ignore'
            ])
        # Check that only one time index is given
        if True in [len(item) > 1 for item in time_index]:
            raise ValueError(
                'More than one time entry per channel. Please correct the configuration.'
            )
        # Make sure we only have integers in the list (find a more clever way to do this)
        time_index = [item[0] for item in time_index]

        # Check that only one id index is given
        if True in [len(item) > 1 for item in id_index]:
            raise ValueError(
                'More than one id entry per channel. Please correct the configuration.'
            )
        # Make sure we only have integers in the list (find a more clever way to do this)
        id_index = [item[0] for item in id_index]

        # Start extracting the actual data
        for line in content[self.parameters['data_start_line']:]:
            line = line.split(separator)
            start_channel_index = 0
            for channel in range(num_channels):
                id_ind = id_index[channel]
                time_ind = time_index[channel]
                try:
                    # Remove ignore columns, but allow not having any
                    for ignore_ind in ignore_index[channel]:
                        line.pop(start_channel_index + ignore_ind)
                        if ignore_ind < id_ind:
                            id_ind = id_ind - 1
                        if ignore_ind < time_ind:
                            time_ind = time_ind - 1
                except IndexError:
                    pass
                if id_ind > time_index[channel]:
                    id_ind = id_ind - 1
                if channel == 0:
                    # Fetch and remove time entries
                    date_time.append(
                        parser.parse(line.pop(time_index[channel]).strip(),
                                     fuzzy=True))
                    # Fetch and remove id entries (assumed the same between channels)
                    try:
                        s_id = int(line.pop(id_ind).strip())
                    except ValueError:
                        s_id = 0
                    sample_id.append(s_id)
                else:
                    # For the other channels, remove time and id data
                    line.pop(start_channel_index + time_index[channel])
                    line.pop(start_channel_index + id_ind)
                # Convert from string to target data for each channel
                try:
                    data[channel].append([
                        float(item) for index, item in enumerate(
                            line[start_channel_index:start_channel_index +
                                 num_fields[channel]])
                    ])
                except ValueError as e:
                    raise ValueError(
                        'A field with an empty string might have been detected. Are you sure you have '
                        'specified correct ignore fields in the parameters?'
                    ) from e

                start_channel_index = start_channel_index + num_fields[channel]
        # Calculate time difference for each step and store that instead of absolute times
        reference_time = date_time[0]
        date_time = [(time - reference_time).total_seconds()
                     for time in date_time]
        # Compose data, time and metadata nodes
        array_data = DataFactory('array')()
        for channel in range(num_channels):
            array_data.set_array('channel_' + str(channel + 1),
                                 np.array(data[channel]))
        array_data.set_array('time', np.array(date_time))
        array_data.set_array('id', np.array(sample_id[channel]))
        meta = DataFactory('dict')(
            dict={
                # Consider to replace the string conversion in the future
                # problem is that we also need timzone information.
                'start_time': str(reference_time.utcnow()),
                'comments': comments,
                'labels': labels
            })
        return {'data': array_data, 'metadata': meta}