data_dict_i["slab_id"] = slab_id_i data_dict_i["ads"] = ads_i data_dict_i["active_site_orig"] = active_site_orig_i data_dict_i["att_num"] = att_num_i # ################################################# data_dict_i.update(data_out) # ################################################# data_dict_list.append(data_dict_i) # ################################################# # ######################################################### df_octa_info = pd.DataFrame(data_dict_list) col_order_list = ["compenv", "slab_id", "ads", "active_site", "att_num"] df_octa_info = reorder_df_columns(col_order_list, df_octa_info) if df_octa_info.shape[0] > 0: df_octa_info = df_octa_info.set_index([ "compenv", "slab_id", "ads", # "active_site_orig", "att_num", ], # "active_site_orig", "att_num", "from_oh", ], "active_site", "att_num", "from_oh", ], drop=True) # ######################################################### # - # ### Combine previous and current `df_octa_info` to create new one # + # # TEMP
new_col_order = [ "compenv", "bulk_id", "facet", "ads", "submitted", "att_num", "rev_num", "num_revs", "is_rev_dir", "is_attempt_dir", "path_job_root", "path_job_root_w_att_rev", ] df = reorder_df_columns(new_col_order, df) # + # df # + # assert False # - # # Saving data and uploading to Dropbox # Pickling data ########################################### directory = os.path.join(os.environ["PROJ_irox_oer"], "dft_workflow/job_processing", "out_data") if not os.path.exists(directory): os.makedirs(directory) file_name_i = "df_jobs_base_" + compenv + ".pickle"
data_dict_i["active_site"] = active_site_i data_dict_i["compenv"] = compenv_i data_dict_i["slab_id"] = slab_id_i data_dict_i["ads"] = ads_i data_dict_i["active_site_orig"] = active_site_orig_i data_dict_i["att_num"] = att_num_i data_dict_i["p_band_center"] = p_band_center_i # ##################################################### data_dict_list.append(data_dict_i) # ##################################################### # ######################################################### df_i = pd.DataFrame(data_dict_list) # ######################################################### col_order_list = ["compenv", "slab_id", "ads", "active_site", "att_num"] df_i = reorder_df_columns(col_order_list, df_i) # ######################################################### # - df_i = df_i.set_index( ["compenv", "slab_id", "ads", "active_site", "att_num", "from_oh"], drop=False) # + df = df_i multi_columns_dict = { "features": [ "p_band_center", ], "data": [
data_dict_i["active_site"] = active_site_i data_dict_i["compenv"] = compenv_i data_dict_i["slab_id"] = slab_id_i data_dict_i["ads"] = ads_i data_dict_i["active_site_orig"] = active_site_orig_i data_dict_i["att_num"] = att_num_i data_dict_i["angle_O_Ir_surf_norm"] = angle_i # ##################################################### data_dict_list.append(data_dict_i) # ##################################################### # ######################################################### df_angles = pd.DataFrame(data_dict_list) col_order_list = ["compenv", "slab_id", "ads", "active_site", "att_num"] df_angles = reorder_df_columns(col_order_list, df_angles) # ######################################################### # - df_angles = df_angles.set_index( # ["compenv", "slab_id", "ads", "active_site", "att_num", ], ["compenv", "slab_id", "ads", "active_site", "att_num", "from_oh"], drop=False) # + df = df_angles multi_columns_dict = { "features": [ "angle_O_Ir_surf_norm", ],
col_order = [ "compenv", "is_submitted", "att_num", "rev_num", "is_rev_dir", "is_attempt_dir", "path_full", "path_rel_to_proj", "path_job_root", "path_job_root_w_att_rev", "path_job_root_w_att", "gdrive_path", ] df = reorder_df_columns(col_order, df) # - # ### Preparing rclone commands to run on the cluster # + df_i = df[df.is_submitted == False] if compenv == "wsl": bash_comm_files_line_list = [] grouped = df_i.groupby([ "compenv", ]) for i_cnt, (name, group) in enumerate(grouped): if verbose: print(40 * "=")
# ######################################################### df_new["e_oh"] = df_new["oh"] - corrections_dict["oh"] df_new["e_o"] = df_new["o"] - corrections_dict["o"] df_new["e_ooh"] = df_new["ooh"] - corrections_dict["ooh"] # - # # Sort column order df_new = reorder_df_columns( [ "bulk_system", "facet", "coverage_type", "surface_type", "e_oh", "e_o", "e_ooh", "oh", "o", "ooh", "g_o-g_oh", "lim_pot", "overpot", "lim_step", ], df_new, ) # # Round float columns # + num_dec = 3 df_new = df_new.round({ "oh": num_dec, "o": num_dec, "ooh": num_dec,
"compenv", "slab_id", "att_num", "ads", "active_site", "job_id_max", "path_short", "timed_out", "completed", "brmix_issue", "job_understandable", "decision", "dft_params_new", "path_rel_to_proj", ] df_jobs_anal = reorder_df_columns(col_order_list, df_jobs_anal) df_jobs_anal = df_jobs_anal.drop(columns=[ "path_rel_to_proj", ]) # ######################################################### # Setting index # index_keys = ["compenv", "slab_id", "ads", "active_site", "att_num"] index_keys = [ "job_type", "compenv", "slab_id", "ads", "active_site", "att_num" ] df_jobs_anal = df_jobs_anal.set_index(index_keys) # - # ### Writing `df_jobs_anal` to file
# ######################################################### # - if verbose: print("df_jobs_data.shape:", df_jobs_data.shape[0]) print("df_jobs_data_clusters_tmp.shape:", df_jobs_data_clusters_tmp.shape[0]) print("df_jobs_data_from_prev.shape:", df_jobs_data_from_prev.shape[0]) # ### Process dataframe # + if df_jobs_data.shape[0] > 0: df_jobs_data = reorder_df_columns([ "bulk_id", "slab_id", "job_id", "facet", ], df_jobs_data) # Set index to job_id df_jobs_data = df_jobs_data.set_index("job_id", drop=False) df_jobs_data_0 = df_jobs_data # Combine rows processed here with those already processed in the cluster df_jobs_data = pd.concat([ df_jobs_data_clusters_tmp, df_jobs_data_0, df_jobs_data_from_prev, ]) # -
# data_dict_i["from_oh"] = from_oh_i # ################################################# data_dict_i.update(out_dict_i) # ################################################# data_dict_list.append(data_dict_i) # ################################################# # ######################################################### df_octa_vol_init = pd.DataFrame(data_dict_list) # ######################################################### # + df_octa_vol_init col_order_list = ["compenv", "slab_id", "ads", "active_site", "att_num"] df_octa_vol_init = reorder_df_columns(col_order_list, df_octa_vol_init) df_octa_vol_init = df_octa_vol_init.set_index( ["compenv", "slab_id", "ads", "active_site", "att_num", ], drop=False) df = df_octa_vol_init multi_columns_dict = { "features": ["active_o_metal_dist", "ir_o_mean", "ir_o_std", "octa_vol", ], "data": ["compenv", "slab_id", "ads", "att_num", "active_site", "job_id_max", ], } nested_columns = dict()
# - # ### Cleaning up dataframe # + cols_order = [ "slab_id", "bulk_id", "facet", "slab_thick", "num_atoms", "slab_final", "loop_time", "iter_time_i", ] df_slab_3 = reorder_df_columns(cols_order, df_slab_3) df_slab_final = df_slab_3 df_slab_final = df_slab_final.set_index("slab_id", drop=False) # - # ### Save data # Pickling data ########################################### import os import pickle directory = os.path.join(os.environ["PROJ_irox_oer"], "workflow/creating_slabs", "out_data") if not os.path.exists(directory): os.makedirs(directory) with open(os.path.join(directory, "df_slab_final.pickle"), "wb") as fle: pickle.dump(df_slab_final, fle)
df_dict[compenv_i] = df_i # df_list.append(df_i) # df_comb = pd.concat(df_list, axis=0) df_comb = pd.concat(list(df_dict.values()), axis=0) df_comb = df_comb.reset_index(drop=True) # Change type of `num_revs` to int # df_comb.num_revs = df_comb.num_revs.astype("int") df_jobs = df_comb # + from misc_modules.pandas_methods import reorder_df_columns df_jobs = reorder_df_columns(["compenv", "compenv_origin"], df_jobs) # + df_slab_ids = get_df_slab_ids() slab_ids = [] for bulk_id_i, facet_i in zip(df_jobs.bulk_id.tolist(), df_jobs.facet.tolist()): slab_id_i = get_slab_id(bulk_id_i, facet_i, df_slab_ids) slab_ids.append(slab_id_i) df_jobs["slab_id"] = slab_ids #| - Reorder DataFrame columns from misc_modules.pandas_methods import reorder_df_columns df_cols = [
# # pd.concat? # + from misc_modules.pandas_methods import reorder_df_columns df_dft = reorder_df_columns( [ "id_unique", "stoich", "energy_pa", "dH", "volume", "volume_pa", "num_atoms", "num_atoms_stan", "num_atoms_stan_prim", "num_atoms_red__stan", "num_atoms_red__stan_prim", "atoms", "atoms_stan", "atoms_stan_prim", ], df_dft, ) # + df_dft = df_dft.rename( columns={ "num_atoms": "na", "num_atoms_stan_prim": "na_stan_prim",
# # Post-process active site dataframe # + from misc_modules.pandas_methods import reorder_df_columns columns_list = [ 'bulk_id', 'slab_id', 'facet', 'num_atoms', 'num_active_sites', 'active_sites', ] df_active_sites = reorder_df_columns(columns_list, df_active_sites) # - # # Combining previous `df_active_sites` and the rows processed during current run # + # df_active_sites = df_active_sites = pd.concat([ # df_active_sites, # df_active_sites_prev, # ]) # - # # Summary of data objects print("Number of active sites:", df_active_sites.num_active_sites.sum()) print("Number of unique active sites",