def setUp(self):
        # create a temporary directories
        self.main_folder = tempfile.mkdtemp()
        configs = set_configs(main_folder=self.main_folder)
        self.configs = configs

        dataset_folder = '/home/ziletti/Documents/calc_nomadml/2d_nature_comm/datasets_2d/'
        self.dataset_folder = dataset_folder

        n_samples = 100

        crystal_classes = [
            'bct139', 'bct142', 'rh/hex', 'sc', 'fcc', 'diam', 'bcc'
        ]
        text_labels = np.array(
            [random.choice(crystal_classes) for _ in range(n_samples)])
        x_pristine = np.random.rand(n_samples, 64, 64, 3)
        x_vac25 = np.random.rand(n_samples, 64, 64, 3)

        label_encoder = preprocessing.LabelEncoder()
        label_encoder.fit(text_labels)
        y_pristine = label_encoder.transform(text_labels)
        # the true labels are the same for pristine and defectives
        y_vac25 = y_pristine

        self.x_pristine = x_pristine
        self.y_pristine = y_pristine
        self.text_labels = text_labels

        self.x_vac25 = x_vac25
        self.y_vac25 = y_vac25
Beispiel #2
0
    def setUp(self):
        # create a temporary directories
        self.main_folder = tempfile.mkdtemp()
        configs = set_configs(main_folder=self.main_folder)
        self.configs = configs

        prdf_binaries_desc_file = get_data_filename(
            '/data/descriptors_data/prdf_binaries.tar.gz')
        self.prdf_binaries_desc_file = prdf_binaries_desc_file
Beispiel #3
0
    def setUp(self):
        # create a temporary directories
        self.main_folder = tempfile.mkdtemp()
        self.figure_folder = tempfile.mkdtemp()
        configs = set_configs(main_folder=self.main_folder)
        self.configs = configs

        # rgb convolutional neural network
        model_arch_file = get_data_filename(
            'data/nn_models/ziletti_et_2018_rgb.json')
        model_weights_file = get_data_filename(
            'data/nn_models/ziletti_et_2018_rgb.h5')
        self.model_arch_file = model_arch_file
        self.model_weights_file = model_weights_file

        # grayscale convolutional neural network
        model_arch_greyscale_file = get_data_filename(
            'data/nn_models/ziletti_diff3d_temp1.json')
        model_weights_greyscale_file = get_data_filename(
            'data/nn_models/ziletti_diff3d_temp1.h5')
        self.model_arch_file_greyscale = model_arch_greyscale_file
        self.model_weights_file_greyscale = model_weights_greyscale_file
    def setUp(self):
        # create a temporary directories
        self.main_folder = tempfile.mkdtemp()
        configs = set_configs(main_folder=self.main_folder)
        self.configs = configs

        ase_db_file_binaries = get_data_filename(
            'data/db_ase/binaries_lowest_energy_ghiringhelli2015.json')
        results_binaries_lasso = get_data_filename(
            'data/viewer_files/l1_l0_dim2_for_viewer.csv')
        results_metal_non_metal = get_data_filename(
            'data/viewer_files/tutorial_metal_non_metal_2017.csv')
        results_topological_ins = get_data_filename(
            'data/viewer_files/tutorial_topological_insulators_2017.csv')
        control_file_binaries = get_data_filename(
            'data/viewer_files/binaries_control.json')

        self.ase_atoms_binaries = read_ase_db(db_path=ase_db_file_binaries)
        self.results_binaries_lasso = results_binaries_lasso
        self.results_metal_non_metal = results_metal_non_metal
        self.control_file_binaries = control_file_binaries
        self.results_topological_ins = results_topological_ins
Beispiel #5
0
 def setUp(self):
     # create a temporary directories
     self.main_folder = tempfile.mkdtemp()
     configs = set_configs(main_folder=self.main_folder)
     self.configs = configs
Beispiel #6
0
from ai4materials.dataprocessing.preprocessing import load_dataset_from_file
from ai4materials.dataprocessing.preprocessing import prepare_dataset_STEM  ### YBC
#from ai4materials.descriptors.diffraction2d import Diffraction2D
from ai4materials.utils.utils_config import set_configs
from ai4materials.utils.utils_config import setup_logger
from ai4materials.utils.utils_crystals import create_supercell
from ai4materials.utils.utils_crystals import create_vacancies
from ai4materials.wrappers import calc_descriptor
from ai4materials.wrappers import load_descriptor
import os.path
from sklearn import preprocessing
import numpy as np
from PIL import Image

# set configs
configs = set_configs(main_folder='./')
logger = setup_logger(configs, level='INFO', display_configs=False)

# setup folder and files
dataset_folder = os.path.join(configs['io']['main_folder'], 'my_datasets')
desc_file_name = 'fcc_bcc_hcp_example'

# calculate the descriptor for the list of structures
images_list = []
targets_list = []

f_list = open("list_shuf_test.txt", 'r')
while True:
    line = f_list.readline()

    if not line:
Beispiel #7
0
from functools import partial
from ai4materials.utils.utils_config import set_configs
from ai4materials.dataprocessing.preprocessing import load_dataset_from_file
from ai4materials.models.cnn_architectures import cnn_nature_comm_ziletti2018
from ai4materials.models.cnn_architectures import cnn_architecture_ai4STEM # YBC
from ai4materials.models.cnn_nature_comm_ziletti2018 import load_datasets
from ai4materials.models.STEM_CNN_segmentation import train_neural_network #YBC
from ai4materials.utils.utils_config import setup_logger
from sklearn import preprocessing
import numpy as np
import os

configs = set_configs()
logger = setup_logger(configs, level='DEBUG', display_configs=False)
#dataset_folder = configs['io']['main_folder']
dataset_folder = os.path.join(configs['io']['main_folder'], 'my_datasets')

# =============================================================================
# Download the dataset from the online repository and load it
# =============================================================================

#x_pristine, y_pristine, dataset_info_pristine, x_vac25, y_vac25, dataset_info_vac25 = load_datasets(dataset_folder)

train_set_name = 'STEM_monocrystalline_train'
path_to_x_pristine = os.path.join(dataset_folder, train_set_name + '_x.pkl')
path_to_y_pristine = os.path.join(dataset_folder, train_set_name + '_y.pkl')
path_to_summary_pristine = os.path.join(dataset_folder, train_set_name + '_summary.json')

test_set_name = 'STEM_monocrystalline_test'
path_to_x_vac25 = os.path.join(dataset_folder, test_set_name + '_x.pkl')
path_to_y_vac25 = os.path.join(dataset_folder, test_set_name + '_y.pkl')
Beispiel #8
0
    import matplotlib.pyplot as plt
    from ai4materials.utils.utils_config import set_configs
    from ai4materials.utils.utils_config import setup_logger
    from ai4materials.utils.utils_data_retrieval import read_ase_db
    from ai4materials.wrappers import load_descriptor
    from ai4materials.wrappers import calc_model
    from ai4materials.wrappers import calc_descriptor
    from ai4materials.descriptors.atomic_features import AtomicFeatures
    from ai4materials.descriptors.atomic_features import get_table_atomic_features
    from ai4materials.utils.utils_config import get_data_filename
    from ai4materials.visualization.viewer import read_control_file
    import numpy as np
    import pandas as pd

    # modify this path if you want to save the calculation results in another location
    configs = set_configs(main_folder='./l1_l0_example')
    logger = setup_logger(configs, level='INFO')

    # setup folder and files
    lookup_file = os.path.join(configs['io']['main_folder'], 'lookup.dat')
    materials_map_plot_file = os.path.join(configs['io']['main_folder'],
                                           'binaries_l1_l0_map_prl2015.png')

    # define descriptor - atomic features in this case
    kwargs = {'energy_unit': 'eV', 'length_unit': 'angstrom'}
    descriptor = AtomicFeatures(configs=configs, **kwargs)

    # =============================================================================
    # Descriptor calculation
    # =============================================================================
def calc_local(geometry_files,
               box_size,
               stride,
               configs,
               padding_ratio=None,
               min_atoms=3,
               adjust_box_size_by_number_of_atoms=False,
               min_n_atoms=100,
               criterion='median',
               min_atoms_spm=50,
               model_file=None,
               path_to_summary_train=None,
               descriptor=None,
               mc_samples=1000,
               plot_results=False,
               desc_filename=None,
               nb_jobs=-1):
    """
    geometry_files: list
        list of geometry files

    box_size: list
        list of box size values (float) to be used for each geometry file.

    stride: list
        list of list of strides to be used for each geometry file.

    padding_ratio: list, optional (default=None)
        list of 1D lists, where each element specifies the
        amount of empty space (relative to the box size, i.e.,
        taking values in [0,1]) that is appended
        at the boundaries. Choosing this to a size
        of 0.5-1.0 typically suffices.
        For the default setting, a padding of 1.0 * box_size
        is used for each spatial dimension.

    min_atoms: int, optional (default=3)
        Minimum number of atoms contained in each box
        for which a descriptor will be calculated.

    adjust_box_size_by_number_of_atoms: boolean, optional (default=False)
        Determine if the box size is automatically tuned
        such that at least 'min_n_atoms' are contained in each box.
        The keyword 'criterion' fixes if the mean or the median of
        the number of atoms is at least 'min_n_atoms'.

    min_n_atoms: int,  optional (default=100)
        If adjust_box_size_by_number_of_atoms=True, this number is
        used to increase the box size until at least min_n_atoms
        atoms are contained in each box based on the criterion fixed
        via the keyword 'criterion'.

    criterion: string, optional (default='median')
        If adjust_box_size_by_number_of_atoms = True, the box size will
        be increased until at least min_n_atoms atoms are contained either
        according to the average (criterion='average') or the
        median (criterion='median').

    model: path to h5 file, optional (default=None)
        If None, then the model used in Leitherer et. al. 2021 will be used.

    descriptor: object, optional (default=None)
        If None, the quippy SOAP descriptor will be employed automatically
        with the standard settings used in Leitherer et. al. 2021.

    mc_samples: int, optional (default=1000)
        Number of Monte Carlo sampes to calculate uncertainty estimate.

    plot_results: boolean, optional (default=False)
        Decide wheter to automatically generate svg files for visual analysis.
        
    nb_jobs: int (default=-1)
        Number of CPUs used for parallel calculation.

    """
    if not desc_filename == None:
        if not (type(desc_filename) == list
                or len(desc_filename) == len(geometry_files)):
            raise ValueError(
                "If specify desc files, specifiy them as list containing at least len(geometry_files) entries."
            )

    if model_file == None:
        model_file = get_data_filename(
            'data/nn_models/AI_SYM_Leitherer_et_al_2021.h5')

    if len(geometry_files) == 0:
        raise ValueError(
            "No geometry files specified - or only passed as string and not as list."
        )

    parameters_to_check = {
        'stride': stride,
        'box_size': box_size,
        'padding_ratio': padding_ratio
    }
    if type(stride) == float or type(box_size) == float:
        raise ValueError(
            "Please specify stride and box size as list of floats.")

    for key in parameters_to_check:
        parameter = parameters_to_check[key]
        print('Test parameter {}'.format(key))
        if key == 'padding_ratio':
            if parameter == None:
                parameter = [[1.0, 1.0, 1.0]
                             for _ in range(len(geometry_files))]
                padding_ratio = parameter
        if not len(parameter) == len(geometry_files):
            raise ValueError(
                "Parameter {} needs to be list of same length as geometry_files."
                .format(key))
    strides = stride
    box_sizes = box_size
    padding_ratios = padding_ratio
    """
    if not type(box_size) == list:
        box_sizes = [float(box_size)]
    else:
        box_sizes = box_size
    if not type(stride) == list:
        strides = [[float(stride), float(stride), float(stride)]]
    elif type(stride) == list:
        strides = [[_, _, _] for _ in stride]
    else:
        strides = stride
    if not type(padding_ratio) == list:
        padding_ratios = [padding_ratio]
    else:
        padding_ratios = padding_ratio
    
    if padding_ratio==None:
        padding_ratios = [[1.0, 1.0, 1.0] for _ in range(len(geometry_files))]
    """

    base_folder = configs['io']['main_folder']
    structure_files = geometry_files

    predictions = []
    uncertainty = []
    #print(structure_files, strides, box_sizes, padding_ratios)
    geom_file_id = 0
    for structure_file, stride_size, box_size, padding_ratio in zip(
            structure_files, strides, box_sizes, padding_ratios):
        print('Structure file {}'.format(structure_file))
        appendix_to_folder = '_box_' + str(box_size) + '_stride_' + str(
            stride_size)

        # atoms scaling chosen automatically here to include the maximal information -> may provide that as
        # as an option in the future.
        atoms_scaling_cutoffs = [box_size, box_size * 2, box_size * 3]
        #atoms_scaling_cutoffs=[20.,30.,40.,50.]

        new_directory = os.path.join(
            base_folder,
            os.path.basename(structure_file)[:-4] + appendix_to_folder)
        if not os.path.exists(new_directory):
            os.makedirs(new_directory)
        else:
            """
            shutil.rmtree(new_directory)           #removes all the subdirectories! -> disabled for now.
            os.makedirs(new_directory)
            """
            run = 2
            while os.path.exists(new_directory + '_run_' + str(run)):
                run += 1
            new_directory = new_directory + '_run_' + str(run)
            os.makedirs(new_directory)
        main_folder = new_directory

        # read config file
        configs_new = set_configs(main_folder=main_folder)
        #logger_new = setup_logger(configs_new, level='INFO', display_configs=False)
        # setup folder and files   - need to check for future release
        # if all of this is necessary.
        checkpoint_dir = os.path.dirname(model_file)
        checkpoint_filename = os.path.basename(model_file)

        dataset_folder = os.path.abspath(
            os.path.normpath(os.path.join(main_folder, 'datasets')))
        conf_matrix_file = os.path.abspath(
            os.path.normpath(os.path.join(main_folder,
                                          'confusion_matrix.png')))
        results_file = os.path.abspath(
            os.path.normpath(os.path.join(main_folder, 'results.csv')))

        configs_new['io']['dataset_folder'] = dataset_folder

        if adjust_box_size_by_number_of_atoms:
            # In the future: refine this part: start from large box and large stride, then make it finer to get more reasonable
            # number of atoms, i.e., start with large box, also make it smaller if exceed the number of atoms!
            initial_box_size = 0
            box_size_step_size = 1
            max_spread = 10
            current_mean_natoms = 0
            current_spread = max_spread * 2
            counter = 0

            start_time = time.time()
            box_size = initial_box_size
            while current_mean_natoms < min_n_atoms:  # or current_spread>max_spread:
                counter += 1
                print("Iteration {}".format(counter))
                box_size += box_size_step_size
                boxes, number_of_atoms_xyz = get_boxes_from_xyz(
                    structure_file,
                    sliding_volume=[box_size, box_size, box_size],
                    stride_size=[4.0, 4.0, 4.0
                                 ],  #[box_size/4., box_size/4., box_size/4.],
                    give_atom_density=True,
                    plot_atom_density=False,
                    padding_ratio=[0.0, 0.0,
                                   0.0])  #, atom_density_filename=os.getcwd())

                current_mean_natoms = np.median(
                    np.array(number_of_atoms_xyz).flatten())
                current_spread = np.std(
                    np.array(number_of_atoms_xyz).flatten())
                print("Mean Natoms = {}, spread = {} ".format(
                    current_mean_natoms, current_spread))

            print("Final box size = {} with natoms mean = {} and spread = {}".
                  format(box_size, current_mean_natoms, current_spread))
            end_time = time.time()

            print("--- %s seconds ---" % (end_time - start_time))

        # adjust padding ratio for slab structures
        polycrystal_structure = read(structure_file, ':', 'xyz')[0]
        positions = polycrystal_structure.positions
        for dim in range(3):
            positions_current_dim = positions[:, dim]
            extension_current_dim = abs(
                max(positions_current_dim) - min(positions_current_dim))
            if extension_current_dim <= box_size:  # if thickness 20 A or smaller, adjust box size suitably such that only one
                # step is takken into that direction, plus no padding is used in that direction. # TODO : only stride adjusted, still be fine prob., but actuall box size should be adjusted???
                #stride_size[dim] = round(extension_current_dim*2) # gives trouble  if extension = 0.0
                padding_ratio[dim] = 0.0
        print("Final stride = {}, final padding ratio = {}".format(
            stride_size, padding_ratio))

        # Descriptor
        if descriptor == None:
            #p_b_c=False
            l_max = 6
            n_max = 9
            atom_sigma = 0.1
            cutoff = 4.0
            central_weight = 0.0
            constrain_nn_distances = False
            descriptor = quippy_SOAP_descriptor(
                configs=configs_new,
                p_b_c=False,
                cutoff=cutoff,
                l_max=l_max,
                n_max=n_max,
                atom_sigma=atom_sigma,
                central_weight=central_weight,
                average=True,
                average_over_permuations=False,
                number_averages=200,
                atoms_scaling='quantile_nn',
                atoms_scaling_cutoffs=atoms_scaling_cutoffs,
                extrinsic_scale_factor=1.0,
                n_Z=1,
                Z=1,
                n_species=1,
                species_Z=1,
                scale_element_sensitive=True,
                return_binary_descriptor=True,
                average_binary_descriptor=True,
                min_atoms=min_atoms,
                shape_soap=316,
                constrain_nn_distances=constrain_nn_distances)

        descriptor.configs = configs_new  # important! otherwise descriptors will be calculated in desc file of first geometry file

        save_file = open(
            os.path.join(
                main_folder,
                os.path.basename(structure_file)[:-4] + '_log_file.txt'), 'w')
        # comment if you have already calculated the descriptor for the .xyz file
        desc_filename_to_load = None
        if not desc_filename == None:
            desc_filename_to_load = desc_filename[geom_file_id]
            geom_file_id += 1

        start = time.time()
        path_to_x_test, path_to_y_test, path_to_summary_test, path_to_strided_pattern_pos = make_strided_pattern_matching_dataset(
            polycrystal_file=structure_file,
            descriptor=descriptor,
            desc_metadata='SOAP_descriptor',
            configs=configs_new,
            operations_on_structure=None,
            stride_size=stride_size,
            box_size=box_size,
            init_sliding_volume=None,
            desc_file=desc_filename_to_load,
            desc_only=False,
            show_plot_lengths=False,
            desc_file_suffix_name='',
            nb_jobs=nb_jobs,
            padding_ratio=padding_ratio,
            min_nb_atoms=min_atoms_spm)  #min_atoms)
        end = time.time()
        ex_time = str(end - start)
        print('Execution time descriptor calculation: ' + ex_time)
        #print(path_to_x_test)
        #print(path_to_y_test)
        #print(path_to_summary_test)
        #print(path_to_strided_pattern_pos)
        save_file.write('Runtime crystal' + structure_file + ' ' + ex_time)

        # copy soap information into dataset folder (need to find more elegant way in the future)
        #shift_training_data_to_different_path(configs_new['io']['dataset_folder'])
        configs_new['io']['polycrystal_file'] = os.path.basename(
            structure_file)

        start = time.time()
        get_classification_map(configs_new,
                               path_to_x_test,
                               path_to_y_test,
                               path_to_summary_test,
                               path_to_strided_pattern_pos,
                               checkpoint_dir,
                               checkpoint_filename=checkpoint_filename,
                               mc_samples=mc_samples,
                               interpolation='none',
                               results_file=None,
                               calc_uncertainty=True,
                               conf_matrix_file=conf_matrix_file,
                               train_set_name='soap_pristine_data',
                               cmap_uncertainty='hot',
                               interpolation_uncertainty='none',
                               plot_results=plot_results,
                               path_to_summary_train=path_to_summary_train)
        end = time.time()
        prediction_str = 'Time for predicting ' + str(end - start) + ' s \n'
        save_file.write(prediction_str)
        save_file.write('Box size ' + str(box_size) + ', stride_size ' +
                        str(stride_size) + ' padding_ratio ' +
                        str(padding_ratio) + ' min_atoms for quippy: ' +
                        str(min_atoms) + ' minatoms SPM ' +
                        str(min_atoms_spm) + ' cutoff_for_scaling ' +
                        str(atoms_scaling_cutoffs))
        save_file.close()

        # load and append predictions and uncertainty
        prediction = np.load(
            os.path.join(
                configs_new['io']['results_folder'],
                configs_new['io']['polycrystal_file'] + '_probabilities.npy'))
        predictions.append(prediction)

        uncertainty_dict = {
            'mutual_information': [],
            'variation_ratio': [],
            'predictive_entropy': []
        }
        for key in uncertainty_dict:
            uncertainty_ = np.load(
                os.path.join(
                    configs_new['io']['results_folder'],
                    configs_new['io']['polycrystal_file'] + '_' + key +
                    '.npy'))
            uncertainty_dict[key] = uncertainty_
        uncertainty.append(uncertainty_dict)

        print('Clean tmp folder')
        clean_folder(configs_new['io']['tmp_folder'],
                     endings_to_delete=(".png", ".npy", "_target.json",
                                        "_aims.in", "_ase_atoms_info.pkl",
                                        "_ase_atoms.json", "_coord.in"))

    return predictions, uncertainty
Beispiel #10
0
    def test_read_configs_empty_file(self):
        configs = set_configs()

        self.assertIsInstance(configs, dict)
    # machine = 'eos'

    if machine == 'eos':
        config_file = '/scratch/ziang/diff_3d/config_prototypes.yml'
        main_folder = '/scratch/ziang/diff_3d/'
        prototypes_basedir = '/scratch/ziang/diff_3d/prototypes_aflow_new/'
        db_files_prototypes_basedir = '/scratch/ziang/diff_3d/db_ase_prototypes'

    else:
        config_file = '/home/ziletti/Documents/calc_nomadml/rot_inv_3d/config_diff3d.yml'
        main_folder = '/home/ziletti/Documents/calc_nomadml/rot_inv_3d/'
        prototypes_basedir = '/home/ziletti/Documents/calc_nomadml/rot_inv_3d/prototypes_aflow_new'
        db_files_prototypes_basedir = '/home/ziletti/Documents/calc_nomadml/rot_inv_3d/db_ase_prototypes'

    # read config file
    configs = set_configs(main_folder=main_folder)
    logger = setup_logger(configs, level='INFO', display_configs=False)

    # setup folder and files
    dataset_folder = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'datasets')))
    checkpoint_dir = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'saved_models')))
    figure_dir = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'attentive_resp_maps')))
    conf_matrix_file = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'confusion_matrix.png')))
    results_file = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'results.csv')))
    lookup_file = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'lookup.dat')))
    control_file = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'control.json')))
    results_file = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'results.csv')))
    filtered_file = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'filtered_file.json')))
    training_log_file = os.path.abspath(
        os.path.normpath(os.path.join(checkpoint_dir, 'training_' + str(now.isoformat()) + '.log')))
    results_file = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'results.csv')))