"""

# | - get_data_for_al

sys.path.insert(0, os.path.join(os.environ["PROJ_irox"], "data"))
from proj_data_irox import (ids_to_discard__too_many_atoms_path)

# | - Get all necessary dfs
df_dict = get_ml_dataframes(
    names=[
        "bulk_dft_data_path",
        "unique_ids_path",
        # "prototypes_data_path",
        "static_irox_structures_path",
        # "static_irox_structures_kirsten_path",
        # "oqmd_irox_data_path",
        "df_features_pre_opt_path",
        "df_features_pre_opt_kirsten_path",
        "df_features_post_opt_path",
        # "oer_bulk_structures_path",
        # "df_ccf_path",
        "df_dij_path",
        # "ids_to_discard__too_many_atoms_path",
    ], )

df_ids = df_dict.get("unique_ids", None)
df_bulk_dft = df_dict.get("bulk_dft_data", None)
df_bulk_dft = df_bulk_dft[df_bulk_dft.source == "raul"]

df_features_pre = df_dict.get("df_features_pre_opt", None)
# df_features_pre = df_dict.get("df_features_pre_opt_kirsten", None)
df_features_post = df_dict.get("df_features_post_opt", None)
Ejemplo n.º 2
0
# # Import Data

# +
with open(AL_data_path, "rb") as fle:
    AL_i = pickle.load(fle)

# #########################################################
DF_dict = get_ml_dataframes(names=[
    'bulk_dft_data_path',
    'unique_ids_path',
    'prototypes_data_path',
    'static_irox_structures_path',
    'static_irox_structures_kirsten_path',
    'oqmd_irox_data_path',
    'df_features_pre_opt_path',
    'df_features_pre_opt_kirsten_path',
    'df_features_post_opt_path',
    'oer_bulk_structures_path',
    'df_ccf_path',
    'df_dij_path',
    'ids_to_discard__too_many_atoms_path',
    'df_prototype_dft_path',
    'df_prototype_static_path',
])

df_bulk_dft = DF_dict["bulk_dft_data"]
# unique_ids = DF_dict["unique_ids"]
# prototypes_data = DF_dict["prototypes_data"]
# static_irox_structures = DF_dict["static_irox_structures"]
# static_irox_structures_kirsten = DF_dict["static_irox_structures_kirsten"]
# oqmd_irox_data = DF_dict["oqmd_irox_data"]
#     '949rnem5z2',
#     'mkmsvkcyc5',
#     'vwxfn3blxi',
#     'nrml6dms9l',
#     ]]

# +
# %%capture

sys.path.insert(0,
                os.path.join(os.environ["PROJ_irox"], "workflow/ml_modelling"))

from ml_methods import get_ml_dataframes
DF_dict = get_ml_dataframes(names=[
    'static_irox_structures_path',
    'df_prototype_dft_path',
    'df_prototype_static_path',
])

df_prototype_static = DF_dict["df_prototype_static"]
df_prototype_dft = DF_dict["df_prototype_dft"]

static_irox_structures = DF_dict['static_irox_structures']
# -

# #########################################################
import pickle
import os
path_i = os.path.join(
    os.environ["PROJ_irox"],
    "CatHub_MPContribs_upload/MPContribs_upload/duplicate_MP_entries",
Ejemplo n.º 4
0
# +
import os
print(os.getcwd())
import sys

import pandas as pd

# +
sys.path.insert(0,
                os.path.join(os.environ["PROJ_irox"], "workflow/ml_modelling"))

from ml_methods import get_ml_dataframes

# +
DF_dict = get_ml_dataframes()

bulk_dft_data = DF_dict['bulk_dft_data']
unique_ids = DF_dict['unique_ids']
prototypes_data = DF_dict['prototypes_data']
static_irox_structures = DF_dict['static_irox_structures']
static_irox_structures_kirsten = DF_dict['static_irox_structures_kirsten']
oqmd_irox_data = DF_dict['oqmd_irox_data']
df_features_pre_opt = DF_dict['df_features_pre_opt']
df_features_pre_opt_kirsten = DF_dict['df_features_pre_opt_kirsten']
df_features_post_opt = DF_dict['df_features_post_opt']
oer_bulk_structures = DF_dict['oer_bulk_structures']
df_ccf = DF_dict['df_ccf']
df_dij = DF_dict['df_dij']
ids_to_discard__too_many_atoms = DF_dict['ids_to_discard__too_many_atoms']
from ase.db import connect
# -

# # Script Inputs

filename = "FinalStructures_1.db"

# # Read df_bulk_dft dataframe

# +
sys.path.insert(0,
                os.path.join(os.environ["PROJ_irox"], "workflow/ml_modelling"))
from ml_methods import get_ml_dataframes

DF_dict = get_ml_dataframes(names=[
    "df_dft_final_final_path",
    # "",
])

df_bulk_dft = DF_dict["df_dft_final_final"]

df_i = df_bulk_dft

# +
# df_i = df_i.loc[[
#     'vovgximhm2',
#     '8dce6kz2vf',
#     'vhv39q6e9j',
#     '8ymh8qnl6o',
#     '6fcdbh9fz2',
#     '7qm56wxj8s',
#     'mu6omk6k9l',