예제 #1
0
def load_double_perovskites_gap(return_lumo=False,
                                data_home=None,
                                download_if_missing=True):
    """
    Convenience function for loading the double_perovskites_gap dataset.

    Args:
        return_lumo (bool) Whether or not to provide LUMO energy dataframe in
            addition to gap dataframe. Defaults to False.

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame, tuple)
    """
    df = load_dataset("double_perovskites_gap")

    if return_lumo:
        lumo = load_dataset("double_perovskites_gap_lumo", data_home,
                            download_if_missing)
        return df, lumo

    return df
예제 #2
0
def featurize(task, n_jobs=1):
    import warnings

    warnings.filterwarnings("ignore", category=RuntimeWarning)
    from modnet.preprocessing import MODData
    from modnet.featurizers.presets import DeBreuck2020Featurizer
    from matminer.datasets import load_dataset

    if task == "matbench_elastic":
        df_g = load_dataset("matbench_log_gvrh")
        df_k = load_dataset("matbench_log_kvrh")
        df = df_g.join(df_k.drop("structure", axis=1))
    else:
        df = load_dataset(task)

    mapping = {
        col: col.replace(" ", "_").replace("(", "").replace(")", "")
        for ind, col in enumerate(df.columns)
    }
    df.rename(columns=mapping, inplace=True)

    targets = [
        col for col in df.columns
        if col not in ("id", "structure", "composition")
    ]

    if "structure" not in df.columns:
        featurizer = CompositionOnlyFeaturizer()
    else:
        featurizer = DeBreuck2020Featurizer(fast_oxid=True)

    try:
        materials = df["structure"] if "structure" in df.columns else df[
            "composition"].map(Composition)
    except KeyError:
        raise RuntimeError(
            f"Could not find any materials data dataset for task {task!r}!")

    data = MODData(
        materials=materials.tolist(),
        targets=df[targets].values,
        target_names=targets,
        featurizer=featurizer,
    )
    data.featurize(n_jobs=n_jobs)
    os.makedirs("./precomputed", exist_ok=True)
    data.save(f"./precomputed/{task}_moddata.pkl.gz")
    return data
예제 #3
0
def simple_violin():
    df = load_dataset("elastic_tensor_2015")
    pf = PlotlyFig(df,
                   title="Distribution of Elastic Constant Averages",
                   colorscale='Reds')
    pf.violin(cols=['K_Reuss', 'K_Voigt', 'G_Reuss', 'G_Voigt'],
              use_colorscale=True)
예제 #4
0
def load_citrine_thermal_conductivity(room_temperature=True, data_home=None,
                                      download_if_missing=True):
    """
    Convenience function for loading the citrine thermal conductivity dataset.

    Args:
        room_temperature (bool) Whether or not to only return items with room
            temperature k_condition. True by default.

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
           it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("citrine_thermal_conductivity", data_home,
                      download_if_missing)

    if room_temperature:
        df = df[df['k_condition'].isin(['room temperature',
                                        'Room temperature',
                                        'Standard',
                                        '298', '300'])]
    return df.drop(['k-units', 'k_condition', 'k_condition_units'], axis=1)
예제 #5
0
def load_glass_ternary_hipt(system="all",
                            data_home=None,
                            download_if_missing=True):
    """
    Convenience function for loading the glass_ternary_hipt dataset.

    Args:
        system (str, list): return items only from the requested system(s)
            options are: "CoFeZr", "CoTiZr", "CoVZr", "FeTiNb"

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("glass_ternary_hipt", data_home, download_if_missing)

    if system != "all":
        if isinstance(system, str):
            system = [system]

        for item in system:
            if item not in {"CoFeZr", "CoTiZr", "CoVZr", "FeTiNb"}:
                raise AttributeError("some of the system list {} are not "
                                     "in this dataset".format(system))
        df = df[df["system"].isin(system)]

    return df
예제 #6
0
def load_citrine_thermal_conductivity(room_temperature=True,
                                      data_home=None,
                                      download_if_missing=True):
    """
    Convenience function for loading the citrine thermal conductivity dataset.

    Args:
        room_temperature (bool) Whether or not to only return items with room
            temperature k_condition. True by default.

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
           it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("citrine_thermal_conductivity", data_home,
                      download_if_missing)

    if room_temperature:
        df = df[df['k_condition'].isin(
            ['room temperature', 'Room temperature', 'Standard', '298',
             '300'])]
    return df.drop(['k-units', 'k_condition', 'k_condition_units'], axis=1)
예제 #7
0
def load_glass_ternary_landolt(processing="all",
                               unique_composition=True,
                               data_home=None,
                               download_if_missing=True):
    """
    Convenience function for loading the glass_ternary_landolt dataset.

    Args:
        processing (str): return only items with a specified processing method
            defaults to all, options are sputtering and meltspin

        unique_composition (bool): Whether or not to combine compositions with
            the same formula

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("glass_ternary_landolt", data_home, download_if_missing)

    if processing != "all":
        if processing in {"meltspin", "sputtering"}:
            df = df[df["processing"] == processing]

        else:
            raise ValueError("Error, processing method unrecognized")

    if unique_composition:
        df = df.groupby("formula").max().reset_index()

    return df
예제 #8
0
def load_elastic_tensor(version="2015",
                        include_metadata=False,
                        data_home=None,
                        download_if_missing=True):
    """
    Convenience function for loading the elastic_tensor dataset.

    Args:
        version (str): Version of the elastic_tensor dataset to load
            (defaults to 2015)

        include_metadata (bool): Whether or not to include the cif, meta,
            and poscar dataset columns. False by default.

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("elastic_tensor" + "_" + version, data_home,
                      download_if_missing)

    if not include_metadata:
        df = df.drop(['cif', 'kpoint_density', 'poscar'], axis=1)

    return df
예제 #9
0
def load_glass_ternary_landolt(processing="all", unique_composition=True,
                               data_home=None, download_if_missing=True):
    """
    Convenience function for loading the glass_ternary_landolt dataset.

    Args:
        processing (str): return only items with a specified processing method
            defaults to all, options are sputtering and meltspin

        unique_composition (bool): Whether or not to combine compositions with
            the same formula

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("glass_ternary_landolt", data_home, download_if_missing)

    if processing != "all":
        if processing in {"meltspin", "sputtering"}:
            df = df[df["processing"] == processing]

        else:
            raise ValueError("Error, processing method unrecognized")

    if unique_composition:
        df = df.groupby("formula").max().reset_index()

    return df
예제 #10
0
def load_glass_ternary_hipt(system="all", data_home=None,
                            download_if_missing=True):
    """
    Convenience function for loading the glass_ternary_hipt dataset.

    Args:
        system (str, list): return items only from the requested system(s)
            options are: "CoFeZr", "CoTiZr", "CoVZr", "FeTiNb"

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("glass_ternary_hipt", data_home, download_if_missing)

    if system != "all":
        if isinstance(system, str):
            system = [system]

        for item in system:
            if item not in {"CoFeZr", "CoTiZr", "CoVZr", "FeTiNb"}:
                raise AttributeError("some of the system list {} are not "
                                     "in this dataset". format(system))
        df = df[df["system"].isin(system)]

    return df
예제 #11
0
def load_elastic_tensor(version="2015", include_metadata=False, data_home=None,
                        download_if_missing=True):
    """
    Convenience function for loading the elastic_tensor dataset.

    Args:
        version (str): Version of the elastic_tensor dataset to load
            (defaults to 2015)

        include_metadata (bool): Whether or not to include the cif, meta,
            and poscar dataset columns. False by default.

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("elastic_tensor" + "_" + version, data_home,
                      download_if_missing)

    if not include_metadata:
        df = df.drop(['cif', 'kpoint_density', 'poscar'], axis=1)

    return df
예제 #12
0
def load_jarvis_dft_2d(drop_nan_columns=None,
                       data_home=None,
                       download_if_missing=True):
    """
    Convenience function for loading the jarvis dft 2d dataset.

    Args:
        drop_nan_columns (list, str): Column or columns to drop rows
        containing NaN values from

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
           it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("jarvis_dft_2d", data_home, download_if_missing)

    if drop_nan_columns is None:
        drop_nan_columns = []
    elif isinstance(drop_nan_columns, str):
        drop_nan_columns = [drop_nan_columns]

    return df.dropna(subset=drop_nan_columns)
예제 #13
0
def advanced_histogram():
    """
    This is a work in progress
    """

    df = load_dataset("dielectric_constant")
    pf = PlotlyFig(df, title="Various Histograms")
    pf.histogram(cols=['G_Reuss', 'G_VRH', 'G_Voigt'], bins={'size': 10})
예제 #14
0
def load_brgoch_superhard_training(subset="all",
                                   drop_suspect=False,
                                   data_home=None,
                                   download_if_missing=True):
    """
    Convenience function for loading the expt_formation_enthalpy dataset.

    Args:
        subset (str): Identifier for subset of data to return,
            all: all possible columns including metadata, engineered features,
                 and basic descriptors
            brgoch_features: only features from reference paper and targets
            basic_descriptors: only composition/structure columns and targets

        drop_suspect (bool): Whether to drop values with possibly incorrect 
            elastic data and materials that could not be verified

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    if subset not in {"all", "brgoch_features", "basic_descriptors"}:
        raise ValueError("Error: dataset subset identifier {} "
                         "not recognized".format(subset))

    df = load_dataset("brgoch_superhard_training", data_home,
                      download_if_missing)

    if drop_suspect:
        df = df[~df["suspect_value"]]

    if subset in {"all", "brgoch_features"}:
        feats_expanded = pd.DataFrame(
            [feat_dict for feat_dict in df["brgoch_feats"]])

        for column in feats_expanded.columns:
            df[column] = feats_expanded[column]

    if subset == "basic_descriptors":
        df = df.drop([
            feat for feat in df.columns if feat not in
            {"composition", "structure", "shear_modulus", "bulk_modulus"}
        ],
                     axis=1)
    elif subset == "brgoch_features":
        df = df.drop([
            "composition", "structure", "formula", "material_id",
            "suspect_value", "brgoch_feats"
        ],
                     axis=1)
    return df
예제 #15
0
def advanced_bar():
    """
    Compare the number of sites in the unit cell and eij_max of the first 5
    samples from the piezoelectric_tensor dataset.
    """
    # Format the general layout of our figure with 5 samples
    pf = PlotlyFig(df=load_dataset("dielectric_constant").iloc[:5],
                   title='Comparison of 5 materials band gaps and n')
    # Plot!
    colors = ['red', 'orange', 'yellow', 'blue', 'green']
    pf.bar(cols=['n', 'band_gap'], labels='formula', colors=colors)
예제 #16
0
def load_mp(include_structures=False, data_home=None, download_if_missing=True):
    """
    Convenience function for loading the materials project dataset.

    Args:
        include_structures (bool) Whether or not to load the full mp
            structure data. False by default.

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
           it isn't on disk

    Returns: (pd.DataFrame)
    """
    if include_structures:
        df = load_dataset('mp_all', data_home, download_if_missing)
    else:
        df = load_dataset('mp_nostruct', data_home, download_if_missing)

    return df
예제 #17
0
def load_data(nrows):
    df = load_dataset("matbench_expt_is_metal")
    df = df.sample(nrows, random_state=42)
    featurizer = Meredig()
    df["pmg_composition"] = df["composition"].apply(lambda x: Composition(x))
    df_feat = featurizer.featurize_dataframe(
        df=df, col_id="pmg_composition", ignore_errors=True,
        pbar=False).drop(columns=["pmg_composition"])
    df_feat["is_metal"] = df_feat["is_metal"].apply(lambda x: 1 if x else 0)
    df_feat["Metal vs Nonmetal"] = df_feat["is_metal"].apply(
        lambda x: "Metal" if x else "Nonmetal")
    return df_feat
예제 #18
0
def basic_histogram():
    """
    Here we plot a basic histogram showing the distribution of band gaps
    in the matminer dielectric constant dataset, originally taken from Petousis
    et al., 2017.
    """
    df = load_dataset("dielectric_constant")
    pf = PlotlyFig(
        title="Distribution of Band Gaps in the Dielectric Constant "
        "Dataset",
        x_title="Band Gap (eV)",
        hoverinfo='y')
    pf.histogram(df['band_gap'])
예제 #19
0
파일: conftest.py 프로젝트: CompRhys/aviary
def df_matbench_phonons():
    """Return a pandas dataframe with the data from the Matbench phonons dataset."""

    df = load_dataset("matbench_phonons")
    df[["lattice", "sites"]] = [get_cgcnn_input(x) for x in df.structure]
    df["material_id"] = [f"mb_phdos_{i}" for i in range(len(df))]
    df["composition"] = [
        x.composition.formula.replace(" ", "") for x in df.structure
    ]

    df["phdos_clf"] = [1 if x > 450 else 0 for x in df["last phdos peak"]]

    return df
예제 #20
0
def load_mp(include_structures=False,
            data_home=None,
            download_if_missing=True):
    """
    Convenience function for loading the materials project dataset.

    Args:
        include_structures (bool) Whether or not to load the full mp
            structure data. False by default.

        data_home (str, None): Where to loom for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
           it isn't on disk

    Returns: (pd.DataFrame)
    """
    if include_structures:
        df = load_dataset('mp_all', data_home, download_if_missing)
    else:
        df = load_dataset('mp_nostruct', data_home, download_if_missing)

    return df
예제 #21
0
def load_double_perovskites_gap(return_lumo=False, data_home=None,
                                download_if_missing=True):
    """
    Convenience function for loading the double_perovskites_gap dataset.

    Args:
        return_lumo (bool) Whether or not to provide LUMO energy dataframe in
            addition to gap dataframe. Defaults to False.

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame, tuple)
    """
    df = load_dataset("double_perovskites_gap")

    if return_lumo:
        lumo = load_dataset("double_perovskites_gap_lumo", data_home,
                            download_if_missing)
        return df, lumo

    return df
예제 #22
0
def load_flla(data_home=None, download_if_missing=True):
    """
    Convenience function for loading the flla dataset.

    Args:
        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("flla", data_home, download_if_missing)

    return df
예제 #23
0
def load_flla(data_home=None, download_if_missing=True):
    """
    Convenience function for loading the flla dataset.

    Args:
        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("flla", data_home, download_if_missing)

    return df
예제 #24
0
def load_double_perovskites_gap_lumo(data_home=None, download_if_missing=True):
    """
    Convenience function for loading the double_perovskites_gap_lumo dataset.

    Args:
        data_home (str, None): Where to loom for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("double_perovskites_gap_lumo", data_home,
                      download_if_missing)

    return df
예제 #25
0
def load_brgoch_superhard_training(subset="all", drop_suspect=False,
                                   data_home=None, download_if_missing=True):
    """
    Convenience function for loading the expt_formation_enthalpy dataset.

    Args:
        subset (str): Identifier for subset of data to return,
            all: all possible columns including metadata, engineered features,
                 and basic descriptors
            brgoch_features: only features from reference paper and targets
            basic_descriptors: only composition/structure columns and targets

        drop_suspect (bool): Whether to drop values with possibly incorrect 
            elastic data and materials that could not be verified

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    if subset not in {"all", "brgoch_features", "basic_descriptors"}:
        raise ValueError("Error: dataset subset identifier {} "
                         "not recognized".format(subset))

    df = load_dataset("brgoch_superhard_training", data_home,
                      download_if_missing)

    if drop_suspect:
        df = df[~df["suspect_value"]]

    if subset in {"all", "brgoch_features"}:
        feats_expanded = pd.DataFrame([feat_dict
                                       for feat_dict in df["brgoch_feats"]])

        for column in feats_expanded.columns:
            df[column] = feats_expanded[column]

    if subset == "basic_descriptors":
        df = df.drop([feat for feat in df.columns
                      if feat not in {"composition", "structure",
                                      "shear_modulus", "bulk_modulus"}], axis=1)
    elif subset == "brgoch_features":
        df = df.drop(["composition", "structure", "formula", "material_id", 
                      "suspect_value", "brgoch_feats"], axis=1)
    return df
예제 #26
0
def formatting_example(api_key, username):
    """
    Demonstrate common and advanced formatting features of PlotlyFig.

    PlotlyFig provides a set of arguments which make setting up good
    looking Plotly templates quick(er) and easy(er).

    Most formatting options can be set through the initializer of PlotlyFig.
    These options will remain the same for all figures producted, but you can
    change some common formatting options after instantitating a PlotlyFig
    object using set_arguments.

    Chart-specific formatting options can be passed to plotting methods.
    """

    if not api_key or not username:
        raise ValueError("Specify your Plotly api_key and username!")

    df = load_dataset("elastic_tensor_2015")

    pf = PlotlyFig(df=df,
                   api_key=api_key,
                   username=username,
                   mode='online',
                   title='Comparison of Bulk Modulus and Shear Modulus',
                   x_title='Shear modulus (GPa)',
                   y_title='Bulk modulus (GPa)',
                   colorbar_title='Poisson Ratio',
                   fontfamily='Raleway',
                   fontscale=0.75,
                   fontcolor='#283747',
                   ticksize=30,
                   colorscale="Reds",
                   hovercolor='white',
                   hoverinfo='text',
                   bgcolor='#F4F6F6',
                   margins=110,
                   pad=10)

    pf.xy(('G_VRH', 'K_VRH'), labels='material_id', colors='poisson_ratio')

    # We can also use LaTeX if we use Plotly online/static
    pf.set_arguments(title="$\\text{Origin of Poisson Ratio } \\nu $",
                     y_title='$K_{VRH} \\text{(GPa)}$',
                     x_title='$G_{VRH} \\text{(GPa)}$',
                     colorbar_title='$\\nu$')
    pf.xy(('G_VRH', 'K_VRH'), labels='material_id', colors='poisson_ratio')
예제 #27
0
def load(dataset_name, dataset_metadata=mbv01_metadata):
    """
    Load a matbench dataset into memory as a dataframe.
    This function is simply a wrapper around the matminer dataloader.

    Each matbench dataset is completely self contained.

    See https://hackingmaterials.lbl.gov/matbench/ for a list of dataset names.
    For example, "matbench_jdft2d".

    Args:
        dataset_name (str): A matbench dataset name, as defined in the
        datasets.json.

    Returns:
        (pandas.DataFrame): The dataset, containing two columns:
            - Inputs, either compositions or pymatgen structure objects.
            - Outputs, either a float (for regression) or a boolean (for
                classification).
    """

    if dataset_name not in dataset_metadata:
        raise KeyError(
            f"Dataset name {dataset_name} not recognized by matbench. "
            f"Please see https://hackingmaterials.lbl.gov/matbench for "
            f"a list of the dataset names, or choose from:"
            f"\n{list(dataset_metadata.keys())}")
    logger.debug(f"Loading {dataset_name} into memory; please be patient as "
                 f"loading many structures can take a while to serialize.")
    df = load_dataset(dataset_name)

    id_n_zeros = math.floor(math.log(df.shape[0], 10)) + 1
    mpcontribs_prefix = dataset_name.replace("matbench",
                                             "mb").replace("_", "-")
    df[MBID_KEY] = [
        f"{mpcontribs_prefix}-{i + 1:0{id_n_zeros}d}" for i in df.index
    ]

    df = df.set_index(MBID_KEY)
    df = df[[
        dataset_metadata[dataset_name].input_type,
        dataset_metadata[dataset_name].target,
    ]]

    return df
예제 #28
0
def plot_bulk_shear_moduli():
    """
    Very basic example of xy scatter plot of Voigt-Reuss-Hill (VRH) average
        bulk vs. shear modulus. Poisson ratio as marker colors make the
        distinction between materials with different bulk/shear modulus ratios
    Returns:
        plotly plot in "offline" mode popped in the default browser.
    """
    df = load_dataset("elastic_tensor_2015")
    pf = PlotlyFig(df,
                   y_title='Bulk Modulus (GPa)',
                   x_title='Shear Modulus (GPa)',
                   filename='bulk_shear_moduli.jpeg')
    pf.xy(('G_VRH', 'K_VRH'),
          labels='material_id',
          colors='poisson_ratio',
          colorscale='Picnic',
          limits={'x': (0, 300)})
예제 #29
0
def plot_scatter_matrix():
    """
    A few different scatter matrix plots using elastic dataset in matminer.
    Returns:
        plotly plot in "offline" mode opened in the default browser.
    """
    df = load_dataset("elastic_tensor_2015")
    pf = PlotlyFig(df)

    # basic matrix:
    pf.scatter_matrix(cols=['K_VRH', 'G_VRH', 'nsites', 'volume'])

    # with colorscale and labels:
    pf.scatter_matrix(cols=['K_VRH', 'G_VRH', 'nsites', 'volume'],
                      colors='nsites',
                      labels='material_id',
                      colorscale='Picnic')

    # with all the numerical columns included (note the change in sizes):
    pf = PlotlyFig(filename='scatter_matrix_elastic', fontscale=0.6)
    pf.scatter_matrix(df, marker_scale=0.6)
예제 #30
0
def load_dielectric_constant(include_metadata=False, data_home=None,
                             download_if_missing=True):
    """
    Convenience function for loading the dielectric_constant dataset.

    Args:
        include_metadata (bool): Whether or not to include the cif, meta,
            and poscar dataset columns. False by default.

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("dielectric_constant", data_home, download_if_missing)

    if not include_metadata:
        df = df.drop(['cif', 'meta', 'poscar'], axis=1)

    return df
예제 #31
0
def plot_mean_elastic_tensors():
    """
    An example of heatmap_df where the input data is real and in dataframe
    format. We want to look at how average of the elastic constant tensor
    changes with the density and crystal system. Note that density is not
    a categorical variable in the final dataframe.

    Returns:
        plotly plot in "offline" mode poped in the default browser.
    """
    df = load_dataset("elastic_tensor_2015")
    # data preparation:
    df['Mean Elastic Constant'] = df['elastic_tensor'].apply(
        lambda x: np.mean(x))
    gs = GlobalSymmetryFeatures(desired_features=['crystal_system'])
    df = gs.featurize_dataframe(df, col_id='structure')
    dsf = DensityFeatures(desired_features=['density'])
    df = dsf.featurize_dataframe(df, col_id='structure')
    # actual plotting
    pf = PlotlyFig(fontscale=0.75,
                   filename='static_elastic_constants',
                   colorscale='RdBu')
    pf.heatmap_df(df[['crystal_system', 'density', 'Mean Elastic Constant']])
예제 #32
0
def load_dielectric_constant(include_metadata=False,
                             data_home=None,
                             download_if_missing=True):
    """
    Convenience function for loading the dielectric_constant dataset.

    Args:
        include_metadata (bool): Whether or not to include the cif, meta,
            and poscar dataset columns. False by default.

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("dielectric_constant", data_home, download_if_missing)

    if not include_metadata:
        df = df.drop(['cif', 'meta', 'poscar'], axis=1)

    return df
예제 #33
0
def load_glass_binary(version="v2", data_home=None, download_if_missing=True):
    """
    Convenience function for loading the glass_binary dataset.

    Args:
        version (str): Version identifier for dataset, see dataset description
            for explanation of each. Defaults to v2

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """

    # Set version tag if dataset is updated to a new version
    dataset_identifier = "glass_binary"
    if version != "v1":
        dataset_identifier = "_".join([dataset_identifier, version])

    df = load_dataset(dataset_identifier, data_home, download_if_missing)

    return df
예제 #34
0
def load_glass_binary(version="v2", data_home=None, download_if_missing=True):
    """
    Convenience function for loading the glass_binary dataset.

    Args:
        version (str): Version identifier for dataset, see dataset description
            for explanation of each. Defaults to v2

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
            it isn't on disk

    Returns: (pd.DataFrame)
    """

    # Set version tag if dataset is updated to a new version
    dataset_identifier = "glass_binary"
    if version != "v1":
        dataset_identifier = "_".join([dataset_identifier, version])

    df = load_dataset(dataset_identifier, data_home, download_if_missing)

    return df
예제 #35
0
def load_jarvis_dft_2d(drop_nan_columns=None, data_home=None,
                       download_if_missing=True):
    """
    Convenience function for loading the jarvis dft 2d dataset.

    Args:
        drop_nan_columns (list, str): Column or columns to drop rows
        containing NaN values from

        data_home (str, None): Where to look for and store the loaded dataset

        download_if_missing (bool): Whether or not to download the dataset if
           it isn't on disk

    Returns: (pd.DataFrame)
    """
    df = load_dataset("jarvis_dft_2d", data_home, download_if_missing)

    if drop_nan_columns is None:
        drop_nan_columns = []
    elif isinstance(drop_nan_columns, str):
        drop_nan_columns = [drop_nan_columns]

    return df.dropna(subset=drop_nan_columns)
예제 #36
0

https://ml.materialsproject.org/projects/matbench_dielectric
"""

# %%
import matplotlib.pyplot as plt
import pandas as pd
from matminer.datasets import load_dataset
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatviz import ptable_heatmap, spacegroup_hist
from tqdm import tqdm

# %%
tqdm.pandas()
(dielectric := load_dataset("matbench_dielectric"))

# %%
dielectric.hist(column="n", bins=50, log=True)
plt.savefig("dielectric-last-dos-peak-hist.pdf")

# %%
dielectric["volume"] = dielectric.structure.apply(lambda cryst: cryst.volume)
dielectric["formula"] = dielectric.structure.apply(lambda cryst: cryst.formula)

ptable_heatmap(dielectric.formula, log=True)
plt.title("Elemental prevalence in the Matbench dieletric dataset")
plt.savefig("dielectric-elements-log.pdf")

# %%
dielectric[["sg_symbol", "sg_number"]] = dielectric.progress_apply(
예제 #37
0
#!/usr/bin/env python
# coding: utf-8

# In[1]:

# UROP Phase 1 Data Retrieval and partial preprocessing
# all data is from material project database.

from matminer.datasets import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
mpdr = MPDataRetrieval(api_key="KcDv6qi5w4rUZSlt")
d = load_dataset("heusler_magnetic")
heusler_formula = d['formula']

# In[ ]:

import pandas as pd
import time

query_time = list()
false_list = list()
true_list = list()
data = list()
heusler_matrix = pd.DataFrame()
start_time = time.time()
for name in heusler_formula:
    t_1 = time.time()
    data_got = mpdr.get_data(
        criteria=name,
        properties=['pretty_formula', 'structure', 'elasticity'])
    t_2 = time.time()
# the Materials Project database, and the dataframe is filtered for stability
# and structure size.
FABER = not args.use_mp
FILTER = not FABER
NJOBS = args.njobs

# Print parameters.
print("REMOVE UNSTABLE ENTRIES:", FILTER)
print("USE FABER DATASET:", FABER)
print("USE TERNARY OXIDE DATASET:", not FABER)
print("NUMBER OF JOBS:", NJOBS)
print("DEBUG MODE:", args.debug)

# Set up dataset
if FABER:
    df = load_dataset("flla")
else:
    # Initialize data retrieval class
    from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
    mpr = MPDataRetrieval()
    criteria = "*-*-O"
    # Choose list of properties to retrive
    properties = [
        'structure', 'nsites', 'formation_energy_per_atom', 'e_above_hull'
    ]
    # Get the dataframe with the matching structure from the Materials Project
    df = mpr.get_dataframe(criteria=criteria, properties=properties)
    # Create the formation_energy feature for the SCM regression, since the SCM
    # model learns formation energy per unit cell rather than per atom.
    df['formation_energy'] = df['formation_energy_per_atom'] * df['nsites']
    # Structures are retrieved as dictionaries but can easily be converted to
예제 #39
0
def plot_modes(api_key, username):
    """
    Demonstrate PlotlyFig plotting modes and show the easiest way to make
    adjustments.

    Offline mode - Set "mode" to "offline"
    Create a local html file. Note that offline mode in plotly disables LaTeX
    and some fonts on some systems by default. For the full-featured Plotly
    experience, please use the Plotly online mode.

    Static mode - Set "mode" to "static"
    Creates a single image file. Use height and width to specify the size of the
    image desired. api_key and username are required for static plotting mode.

    Online mode - Set "mode" to "online"
    Opens the figure in the Plotly online module.

    Notebook mode - Set "mode" to "notebook"
    Opens the figure in a Jupyter/IPython notebook. Not shown here, seen
    matminer_examples repository.

    Return mode - Pass "return_plot=True" into any plotting method
    Returns the figure as a 'bare-bones' dictionary. This can then be edited and
    passed into 'create_plot' of PlotlyFig or used directly with plotly.

    """

    if not api_key or not username:
        raise ValueError("Specify your Plotly api_key and username!")

    df = load_dataset("elastic_tensor_2015")

    # First lets set uo our figure generally.
    pf = PlotlyFig(df, title='Elastic data', mode='offline', x_scale='log',
                   y_scale='log')

    # Lets plot offline (the default) first. An html file will be created.
    pf.xy([('poisson_ratio', 'elastic_anisotropy')], labels='formula')

    # Now lets plot again, but changing the filename and without opening.
    # We do this with the 'set_arguments' method.
    pf.set_arguments(show_offline_plot=False, filename="myplot.html")
    pf.xy([('poisson_ratio', 'elastic_anisotropy')], labels='formula')

    # Now lets create a static image.
    pf.set_arguments(mode='static',
                     api_key=api_key,
                     username=username,
                     filename="my_PlotlyFig_plot.jpeg")
    pf.xy([('poisson_ratio', 'elastic_anisotropy')], labels='formula')
    # You can change the size of the image with the 'height' and 'width'
    # arguments to set_arguments.

    # Now we will use the Plotly online interface.
    pf.set_arguments(mode='online')
    pf.xy([('poisson_ratio', 'elastic_anisotropy')], labels='formula')

    # Great! Lets get the JSON representation of the PlotlyFig template as a
    # python dictionary. We can do this without changing the plot mode. From
    # any plotting method, simply pass 'return_plot=True' to return the plot.
    fig = pf.xy([('poisson_ratio', 'elastic_anisotropy')], labels='formula',
                return_plot=True)
    print("Here's our returned figure!")
    pprint.pprint(fig)

    # Edit the figure and plot it with the current plot mode (online):
    fig['layout']['hoverlabel']['bgcolor'] = 'pink'
    fig['layout']['title'] = 'My Custom Elastic Data Figure'
    pf.create_plot(fig)
 def setUp(self):
     self.test_df = load_dataset('elastic_tensor_2015').rename(
         columns={"formula": "composition"})
예제 #41
0
        else:
            raise ValueError(f"Worker {worker} not valid!")

        o_utf = o.decode("utf-8")
        o_all = o_utf.split("\n")
        o_all.remove("")
        password = o_all[-1]

        ssh = SSHClient()
        ssh.load_system_host_keys()
        ssh.connect(host,
                    username=user,
                    password=password,
                    look_for_keys=False)

        with SCPClient(ssh.get_transport()) as scp:
            scp.put(filepath,
                    recursive=True,
                    remote_path="/global/home/users/ardunn")
    else:
        pass


if __name__ == "__main__":
    import pandas as pd
    from matminer.datasets import load_dataset
    from automatminer_dev.workflows.util import get_time_str

    df = load_dataset("matbench_jdft2d")
    transfer_data(df, "lrc", get_time_str())