df_ads = get_df_ads()
df_ads = df_ads.set_index(["compenv", "slab_id", "active_site", ], drop=False)

df_features = get_df_features()
df_features.index = df_features.index.droplevel(level=5)

df_jobs = get_df_jobs()

df_slab = get_df_slab()

df_jobs_data = get_df_jobs_data()
df_jobs_data["rerun_from_oh"] = df_jobs_data["rerun_from_oh"].fillna(value=False)

df_dft = get_df_dft()

df_job_ids = get_df_job_ids()
df_job_ids = df_job_ids.set_index("job_id")
df_job_ids = df_job_ids[~df_job_ids.index.duplicated(keep='first')]
# -

feature_cols = df_features["features"].columns.tolist()

# + active=""
#
#
# -

# ### Collecting other relevent data columns from various data objects

# +
# #########################################################
Beispiel #2
0
from local_methods import get_num_revs_for_group
# -

from methods import isnotebook
isnotebook_i = isnotebook()
if isnotebook_i:
    from tqdm.notebook import tqdm
    verbose = True
else:
    from tqdm import tqdm
    verbose = False

# # Read Data

# +
df_job_ids = get_df_job_ids()

compenv_local = os.environ["COMPENV"]
# -

# # Parse `df_jobs_base` files

# +
root_dir = os.path.join(os.environ["PROJ_irox_oer"],
                        "dft_workflow/job_processing", "out_data")

compenvs = [
    "nersc",
    "sherlock",
    "slac",
    "wsl",
def process_group_magmom_comp(
    name=None,
    group=None,
    write_atoms_objects=False,
    verbose=False,
):
    """
    """
    #| - process_group_magmom_comp
    # #####################################################
    group_w_o = group

    # #####################################################
    out_dict = dict()
    out_dict["df_magmoms_comp"] = None
    out_dict["good_triplet_comb"] = None
    out_dict["job_ids"] = None
    # out_dict[""] =

    job_ids_list = list(set(group.job_id_max.tolist()))

    #| - Reading data
    # #########################################################
    df_jobs = get_df_jobs()

    # #########################################################
    df_atoms_sorted_ind = get_df_atoms_sorted_ind()
    df_atoms_sorted_ind = df_atoms_sorted_ind.set_index("job_id")

    # #########################################################
    df_job_ids = get_df_job_ids()
    df_job_ids = df_job_ids.set_index("job_id")

    from methods import read_magmom_comp_data

    assert name != None, "Must pass name to read previous data"

    magmom_comp_data_prev = read_magmom_comp_data(name=name)
    if magmom_comp_data_prev is not None:
        pair_wise_magmom_comp_data_prev = \
            magmom_comp_data_prev["pair_wise_magmom_comp_data"]
    #__|

    if write_atoms_objects:
        #| - Write atoms objects
        df_i = pd.concat([
            df_job_ids, df_atoms_sorted_ind.loc[group_w_o.job_id_max.tolist()]
        ],
                         axis=1,
                         join="inner")

        # #########################################################
        df_index_i = group_w_o.index.to_frame()
        compenv_i = df_index_i.compenv.unique()[0]
        slab_id_i = df_index_i.slab_id.unique()[0]

        active_sites = [
            i for i in df_index_i.active_site.unique() if i != "NaN"
        ]
        active_site_i = active_sites[0]

        folder_name = compenv_i + "__" + slab_id_i + "__" + str(
            int(active_site_i))
        # #########################################################

        for job_id_i, row_i in df_i.iterrows():
            tmp = 42

            job_id = row_i.name
            atoms = row_i.atoms_sorted_good
            ads = row_i.ads

            file_name = ads + "_" + job_id + ".traj"

            print("Is this saving to the right place d9sf")
            root_file_path = os.path.join("__temp__", folder_name)
            print(os.getcwd(), root_file_path)
            if not os.path.exists(root_file_path):
                os.makedirs(root_file_path)

            file_path = os.path.join(root_file_path, file_name)

            atoms.write(file_path)
        #__|

    # #####################################################
    #| - Getting good triplet combinations
    all_triplet_comb = list(
        itertools.combinations(group_w_o.job_id_max.tolist(), 3))

    good_triplet_comb = []
    for tri_i in all_triplet_comb:
        df_jobs_i = df_jobs.loc[list(tri_i)]

        # Triplet must not contain duplicate ads
        # Must strictly be a *O, *OH, and *bare triplet
        ads_freq_dict = CountFrequency(df_jobs_i.ads.tolist())

        tmp_list = list(ads_freq_dict.values())
        any_repeat_ads = [True if i > 1 else False for i in tmp_list]

        if not any(any_repeat_ads):
            good_triplet_comb.append(tri_i)
    #__|

    # #####################################################
    #| - MAIN LOOP
    if verbose:
        print("Number of viable triplet combinations:", len(good_triplet_comb))

    data_dict_list = []
    pair_wise_magmom_comp_data = dict()
    for tri_i in good_triplet_comb:
        #| - Process triplets
        data_dict_i = dict()

        if verbose:
            print("tri_i:", tri_i)

        all_pairs = list(itertools.combinations(tri_i, 2))

        df_jobs_i = df_jobs.loc[list(tri_i)]

        sum_norm_abs_magmom_diff = 0.
        for pair_i in all_pairs:

            # if pair_i in list(pair_wise_magmom_comp_data_prev.keys()):
            if (magmom_comp_data_prev is not None) and \
               (pair_i in list(pair_wise_magmom_comp_data_prev.keys())):
                magmom_data_out = pair_wise_magmom_comp_data_prev[pair_i]
            else:
                # print("Need to run manually")
                # print("pair_i:", pair_i)
                #| - Process pairs
                row_jobs_0 = df_jobs.loc[pair_i[0]]
                row_jobs_1 = df_jobs.loc[pair_i[1]]

                ads_0 = row_jobs_0.ads
                ads_1 = row_jobs_1.ads

                # #############################################
                if set([ads_0, ads_1]) == set(["o", "oh"]):
                    job_id_0 = df_jobs_i[df_jobs_i.ads == "o"].iloc[0].job_id
                    job_id_1 = df_jobs_i[df_jobs_i.ads == "oh"].iloc[0].job_id
                elif set([ads_0, ads_1]) == set(["o", "bare"]):
                    job_id_0 = df_jobs_i[df_jobs_i.ads ==
                                         "bare"].iloc[0].job_id
                    job_id_1 = df_jobs_i[df_jobs_i.ads == "o"].iloc[0].job_id
                elif set([ads_0, ads_1]) == set(["oh", "bare"]):
                    job_id_0 = df_jobs_i[df_jobs_i.ads ==
                                         "bare"].iloc[0].job_id
                    job_id_1 = df_jobs_i[df_jobs_i.ads == "oh"].iloc[0].job_id
                else:
                    print("Woops something went wrong here")

                # #############################################
                row_atoms_i = df_atoms_sorted_ind.loc[job_id_0]
                # #############################################
                atoms_0 = row_atoms_i.atoms_sorted_good
                magmoms_sorted_good_0 = row_atoms_i.magmoms_sorted_good
                was_sorted_0 = row_atoms_i.was_sorted
                # #############################################

                # #############################################
                row_atoms_i = df_atoms_sorted_ind.loc[job_id_1]
                # #############################################
                atoms_1 = row_atoms_i.atoms_sorted_good
                magmoms_sorted_good_1 = row_atoms_i.magmoms_sorted_good
                was_sorted_1 = row_atoms_i.was_sorted
                # #############################################

                # #############################################
                magmom_data_out = get_magmom_diff_data(
                    ads_atoms=atoms_1,
                    slab_atoms=atoms_0,
                    ads_magmoms=magmoms_sorted_good_1,
                    slab_magmoms=magmoms_sorted_good_0,
                )
                #__|

            pair_wise_magmom_comp_data[pair_i] = magmom_data_out

            tot_abs_magmom_diff = magmom_data_out["tot_abs_magmom_diff"]
            norm_abs_magmom_diff = magmom_data_out["norm_abs_magmom_diff"]
            if verbose:
                print("    ",
                      "pair_i: ",
                      pair_i,
                      ": ",
                      np.round(norm_abs_magmom_diff, 3),
                      sep="")

            sum_norm_abs_magmom_diff += norm_abs_magmom_diff

        # #################################################
        data_dict_i["job_ids_tri"] = set(tri_i)
        data_dict_i["sum_norm_abs_magmom_diff"] = sum_norm_abs_magmom_diff
        # #################################################
        data_dict_list.append(data_dict_i)
        # #################################################

        #__|

    #__|

    # #####################################################
    df_magmoms_i = pd.DataFrame(data_dict_list)

    # #####################################################
    out_dict["df_magmoms_comp"] = df_magmoms_i
    out_dict["good_triplet_comb"] = good_triplet_comb
    out_dict["pair_wise_magmom_comp_data"] = pair_wise_magmom_comp_data
    out_dict["job_ids"] = job_ids_list
    # #####################################################

    return (out_dict)
def old_get_ORR_PLT():
    """
    """
    #| - get_ORR_PLT

    # #########################################################
    # df_ads = get_df_ads()

    # df_ads = df_ads[~df_ads.g_oh.isna()]
    # df_m = df_ads

    # #########################################################
    df_dft = get_df_dft()

    # #########################################################
    df_job_ids = get_df_job_ids()

    # #########################################################
    df_features_targets = get_df_features_targets()



    smart_format_dict = [
        [{"stoich": "AB2"}, {"color2": "black"}],
        [{"stoich": "AB3"}, {"color2": "grey"}],
        ]

    ORR_PLT = ORR_Free_E_Plot(
        free_energy_df=None,
        state_title="ads",
        free_e_title="ads_g",
        smart_format=smart_format_dict,
        color_list=None,
        rxn_type="OER")

    # # df_m.g_ooh = 1.16 * df_m.g_oh + 2.8
    # df_m["g_ooh"] = df_m.g_oh + 2.8

    # df_m = df_m.set_index(["compenv", "slab_id", ], drop=False)

    new_col = (df_features_targets["targets"]["g_oh"] + 2.8)

    new_col.name = ("targets", "g_ooh", "", )

    df_features_targets = pd.concat([
        new_col,
        df_features_targets,
        ], axis=1)


    paths_dict = dict()
    # for name_i, row_i in df_m.iterrows():
    for name_i, row_i in df_features_targets.iterrows():

        #| - Loop through data and add to ORR_PLT

        # #####################################################
        g_o_i = row_i[("targets", "g_o", "", )]
        g_oh_i = row_i[("targets", "g_oh", "", )]
        g_ooh_i = row_i[("targets", "g_ooh", "", )]
        slab_id_i = row_i[("data", "slab_id", "")]
        active_site_i = row_i[("data", "active_site", "")]
        job_id_o_i = row_i[("data", "job_id_o", "")]
        job_id_oh_i = row_i[("data", "job_id_oh", "")]
        # #####################################################

        # #####################################################
        df_job_ids_i = df_job_ids[df_job_ids.slab_id == slab_id_i]

        bulk_ids = df_job_ids_i.bulk_id.unique()

        mess_i = "SIJFIDSIFJIDSJIf"
        assert len(bulk_ids) == 1, mess_i

        bulk_id_i = bulk_ids[0]

        # #########################################################
        row_dft_i = df_dft.loc[bulk_id_i]
        # #########################################################
        stoich_i = row_dft_i.stoich
        # #########################################################


        data_dict_list =  [
            {"ads_g": g_o_i, "ads": "o", },
            {"ads_g": g_oh_i, "ads": "oh", },
            {"ads_g": g_ooh_i, "ads": "ooh", },
            {"ads_g": 0., "ads": "bulk", },
            ]
        df_i = pd.DataFrame(data_dict_list)

        df_i["stoich"] = stoich_i


        prop_name_list = [
            "stoich",
            ]

        # #########################################################
        # name_i = "IDSJFISDf"
        name_i = slab_id_i + "__" + str(int(active_site_i))
        ORR_PLT.add_series(
            df_i,
            plot_mode="all",
            overpotential_type="OER",
            property_key_list=prop_name_list,
            add_overpot=False,
            name_i=name_i,
            )
        #__|

    return(ORR_PLT)