data_dict_i["slab_id"] = slab_id_i
    data_dict_i["ads"] = ads_i
    data_dict_i["active_site_orig"] = active_site_orig_i
    data_dict_i["att_num"] = att_num_i
    # #################################################
    data_dict_i.update(data_out)
    # #################################################
    data_dict_list.append(data_dict_i)
    # #################################################


# #########################################################
df_octa_info = pd.DataFrame(data_dict_list)

col_order_list = ["compenv", "slab_id", "ads", "active_site", "att_num"]
df_octa_info = reorder_df_columns(col_order_list, df_octa_info)

if df_octa_info.shape[0] > 0:
    df_octa_info = df_octa_info.set_index([
        "compenv", "slab_id", "ads",
        # "active_site_orig", "att_num", ],
        # "active_site_orig", "att_num", "from_oh", ],
        "active_site", "att_num", "from_oh", ],
        drop=True)
# #########################################################
# -

# ### Combine previous and current `df_octa_info` to create new one

# +
# # TEMP
Exemple #2
0
new_col_order = [
    "compenv",
    "bulk_id",
    "facet",
    "ads",
    "submitted",
    "att_num",
    "rev_num",
    "num_revs",
    "is_rev_dir",
    "is_attempt_dir",
    "path_job_root",
    "path_job_root_w_att_rev",
]

df = reorder_df_columns(new_col_order, df)

# +
# df

# +
# assert False
# -

# # Saving data and uploading to Dropbox

# Pickling data ###########################################
directory = os.path.join(os.environ["PROJ_irox_oer"],
                         "dft_workflow/job_processing", "out_data")
if not os.path.exists(directory): os.makedirs(directory)
file_name_i = "df_jobs_base_" + compenv + ".pickle"
Exemple #3
0
            data_dict_i["active_site"] = active_site_i
            data_dict_i["compenv"] = compenv_i
            data_dict_i["slab_id"] = slab_id_i
            data_dict_i["ads"] = ads_i
            data_dict_i["active_site_orig"] = active_site_orig_i
            data_dict_i["att_num"] = att_num_i
            data_dict_i["p_band_center"] = p_band_center_i
            # #####################################################
            data_dict_list.append(data_dict_i)
            # #####################################################

# #########################################################
df_i = pd.DataFrame(data_dict_list)
# #########################################################
col_order_list = ["compenv", "slab_id", "ads", "active_site", "att_num"]
df_i = reorder_df_columns(col_order_list, df_i)
# #########################################################
# -

df_i = df_i.set_index(
    ["compenv", "slab_id", "ads", "active_site", "att_num", "from_oh"],
    drop=False)

# +
df = df_i

multi_columns_dict = {
    "features": [
        "p_band_center",
    ],
    "data": [
Exemple #4
0
    data_dict_i["active_site"] = active_site_i
    data_dict_i["compenv"] = compenv_i
    data_dict_i["slab_id"] = slab_id_i
    data_dict_i["ads"] = ads_i
    data_dict_i["active_site_orig"] = active_site_orig_i
    data_dict_i["att_num"] = att_num_i
    data_dict_i["angle_O_Ir_surf_norm"] = angle_i
    # #####################################################
    data_dict_list.append(data_dict_i)
    # #####################################################

# #########################################################
df_angles = pd.DataFrame(data_dict_list)

col_order_list = ["compenv", "slab_id", "ads", "active_site", "att_num"]
df_angles = reorder_df_columns(col_order_list, df_angles)
# #########################################################
# -

df_angles = df_angles.set_index(
    # ["compenv", "slab_id", "ads", "active_site", "att_num", ],
    ["compenv", "slab_id", "ads", "active_site", "att_num", "from_oh"],
    drop=False)

# +
df = df_angles

multi_columns_dict = {
    "features": [
        "angle_O_Ir_surf_norm",
    ],
Exemple #5
0
col_order = [
    "compenv",
    "is_submitted",
    "att_num",
    "rev_num",
    "is_rev_dir",
    "is_attempt_dir",
    "path_full",
    "path_rel_to_proj",
    "path_job_root",
    "path_job_root_w_att_rev",
    "path_job_root_w_att",
    "gdrive_path",
]
df = reorder_df_columns(col_order, df)
# -

# ### Preparing rclone commands to run on the cluster

# +
df_i = df[df.is_submitted == False]

if compenv == "wsl":
    bash_comm_files_line_list = []
    grouped = df_i.groupby([
        "compenv",
    ])
    for i_cnt, (name, group) in enumerate(grouped):
        if verbose:
            print(40 * "=")
# #########################################################
df_new["e_oh"] = df_new["oh"] - corrections_dict["oh"]
df_new["e_o"] = df_new["o"] - corrections_dict["o"]
df_new["e_ooh"] = df_new["ooh"] - corrections_dict["ooh"]
# -

# # Sort column order

df_new = reorder_df_columns(
    [
        "bulk_system",
        "facet",
        "coverage_type",
        "surface_type",
        "e_oh", "e_o", "e_ooh",
        "oh", "o", "ooh",
        "g_o-g_oh",
        "lim_pot",
        "overpot",
        "lim_step",
        ],
    df_new,
    )

# # Round float columns

# +
num_dec = 3

df_new = df_new.round({
    "oh": num_dec, "o": num_dec, "ooh": num_dec,
    "compenv",
    "slab_id",
    "att_num",
    "ads",
    "active_site",
    "job_id_max",
    "path_short",
    "timed_out",
    "completed",
    "brmix_issue",
    "job_understandable",
    "decision",
    "dft_params_new",
    "path_rel_to_proj",
]
df_jobs_anal = reorder_df_columns(col_order_list, df_jobs_anal)
df_jobs_anal = df_jobs_anal.drop(columns=[
    "path_rel_to_proj",
])

# #########################################################
# Setting index
# index_keys = ["compenv", "slab_id", "ads", "active_site", "att_num"]
index_keys = [
    "job_type", "compenv", "slab_id", "ads", "active_site", "att_num"
]
df_jobs_anal = df_jobs_anal.set_index(index_keys)
# -

# ### Writing `df_jobs_anal` to file
Exemple #8
0
# #########################################################
# -

if verbose:
    print("df_jobs_data.shape:", df_jobs_data.shape[0])
    print("df_jobs_data_clusters_tmp.shape:",
          df_jobs_data_clusters_tmp.shape[0])
    print("df_jobs_data_from_prev.shape:", df_jobs_data_from_prev.shape[0])

# ### Process dataframe

# +
if df_jobs_data.shape[0] > 0:
    df_jobs_data = reorder_df_columns([
        "bulk_id",
        "slab_id",
        "job_id",
        "facet",
    ], df_jobs_data)

    # Set index to job_id
    df_jobs_data = df_jobs_data.set_index("job_id", drop=False)

df_jobs_data_0 = df_jobs_data

# Combine rows processed here with those already processed in the cluster
df_jobs_data = pd.concat([
    df_jobs_data_clusters_tmp,
    df_jobs_data_0,
    df_jobs_data_from_prev,
])
# -
        # data_dict_i["from_oh"] = from_oh_i
        # #################################################
        data_dict_i.update(out_dict_i)
        # #################################################
        data_dict_list.append(data_dict_i)
        # #################################################

# #########################################################
df_octa_vol_init = pd.DataFrame(data_dict_list)
# #########################################################

# +
df_octa_vol_init

col_order_list = ["compenv", "slab_id", "ads", "active_site", "att_num"]
df_octa_vol_init = reorder_df_columns(col_order_list, df_octa_vol_init)

df_octa_vol_init = df_octa_vol_init.set_index(
        ["compenv", "slab_id", "ads", "active_site", "att_num", ],
    drop=False)



df = df_octa_vol_init

multi_columns_dict = {
    "features": ["active_o_metal_dist", "ir_o_mean", "ir_o_std", "octa_vol", ],
    "data": ["compenv", "slab_id", "ads", "att_num", "active_site", "job_id_max", ],
    }

nested_columns = dict()
# -

# ### Cleaning up dataframe

# +
cols_order = [
    "slab_id",
    "bulk_id",
    "facet",
    "slab_thick",
    "num_atoms",
    "slab_final",
    "loop_time",
    "iter_time_i",
]
df_slab_3 = reorder_df_columns(cols_order, df_slab_3)

df_slab_final = df_slab_3
df_slab_final = df_slab_final.set_index("slab_id", drop=False)
# -

# ### Save data

# Pickling data ###########################################
import os
import pickle
directory = os.path.join(os.environ["PROJ_irox_oer"],
                         "workflow/creating_slabs", "out_data")
if not os.path.exists(directory): os.makedirs(directory)
with open(os.path.join(directory, "df_slab_final.pickle"), "wb") as fle:
    pickle.dump(df_slab_final, fle)
Exemple #11
0
        df_dict[compenv_i] = df_i
        # df_list.append(df_i)

# df_comb = pd.concat(df_list, axis=0)
df_comb = pd.concat(list(df_dict.values()), axis=0)
df_comb = df_comb.reset_index(drop=True)

# Change type of `num_revs` to int
# df_comb.num_revs = df_comb.num_revs.astype("int")

df_jobs = df_comb

# +
from misc_modules.pandas_methods import reorder_df_columns

df_jobs = reorder_df_columns(["compenv", "compenv_origin"], df_jobs)

# +
df_slab_ids = get_df_slab_ids()

slab_ids = []
for bulk_id_i, facet_i in zip(df_jobs.bulk_id.tolist(),
                              df_jobs.facet.tolist()):
    slab_id_i = get_slab_id(bulk_id_i, facet_i, df_slab_ids)
    slab_ids.append(slab_id_i)
df_jobs["slab_id"] = slab_ids

#| - Reorder DataFrame columns
from misc_modules.pandas_methods import reorder_df_columns

df_cols = [
Exemple #12
0
# # pd.concat?

# +
from misc_modules.pandas_methods import reorder_df_columns

df_dft = reorder_df_columns(
    [
        "id_unique",
        "stoich",
        "energy_pa",
        "dH",
        "volume",
        "volume_pa",
        "num_atoms",
        "num_atoms_stan",
        "num_atoms_stan_prim",
        "num_atoms_red__stan",
        "num_atoms_red__stan_prim",
        "atoms",
        "atoms_stan",
        "atoms_stan_prim",
    ],
    df_dft,
)

# +
df_dft = df_dft.rename(
    columns={
        "num_atoms": "na",
        "num_atoms_stan_prim": "na_stan_prim",
# # Post-process active site dataframe

# +
from misc_modules.pandas_methods import reorder_df_columns

columns_list = [
    'bulk_id',
    'slab_id',
    'facet',
    'num_atoms',
    'num_active_sites',
    'active_sites',
]

df_active_sites = reorder_df_columns(columns_list, df_active_sites)
# -

# # Combining previous `df_active_sites` and the rows processed during current run

# +
# df_active_sites = df_active_sites = pd.concat([
#     df_active_sites,
#     df_active_sites_prev,
#     ])
# -

# # Summary of data objects

print("Number of active sites:", df_active_sites.num_active_sites.sum())
print("Number of unique active sites",