def re_arrange_learning_results(results_file):
    res = np.load(results_file,
                  allow_pickle=True)  # [[y_real, y_pred], [...], ...]
    db = DB("y4_python/11k_molecule_database_eV.db")
    regression = MyRegression(db)
    m_r, c_r = regression.slope, regression.intercept
    all_ = db.get_all()

    def get_Eblyp(row_idx, dE_pred):
        _, i_pm7, *_ = all_[int(row_idx)]
        return (m_r * i_pm7 + c_r) - dE_pred

    res = res[np.argsort(res[:, 0])]

    E_blyp_pred = np.fromiter(
        (get_Eblyp(res[idx][0], res[idx][2]) for idx in range(len(res))),
        dtype=np.float64)

    array_all = np.array(all_)
    fig = plt.figure()
    ax = fig.add_subplot(121)
    ax.hist2d(array_all[:, 1], array_all[:, 2], bins=(200, 100), cmin=1)
    ax.set_title("ax")
    ax2 = fig.add_subplot(122)
    ax2.hist2d(array_all[:, 1], E_blyp_pred, bins=(200, 100), cmin=1)
    ax2.set_title("ax2")
    rmse = mean_absolute_error(array_all[:, 2], E_blyp_pred)
    pearsonr(array_all[:, 2], E_blyp_pred)
    print(rmse)
    plt.show()
def time_RDF_distance():
    db = DB("y4_python/11k_molecule_database_eV.db")

    homo_list = db.get_homo_molecular_orbitals()
    lumo_list = []

    def run():
        ctr = 0
        for idx in range(0, len(homo_list), 2):
            try:
                j = homo_list[idx + 1]
            except:
                break
            i = homo_list[idx]
            orbital_distance(
                i, {},
                j, {},
                homo_coeff=1.0,
                lumo_coeff=0.0,
                orbital_distance_kwargs={"radial_distribution_coeff": 1.0})
            ctr += 1
        #print("counter = ", ctr)

    number = 1000
    t = timeit(lambda: run(), number=number)
    print(t)
def dE_from_row_idx(row_idx):
    db = DB("y4_python/11k_molecule_database_eV.db")
    regression = MyRegression(db)
    all_ = db.get_all()
    row1 = all_[row_idx]
    i_mol_id, i_pm7, i_blyp, i_smiles, i_fp, i_homo, i_lumo = row1
    return regression.distance_from_regress(i_pm7, i_blyp)
def D_RDF_from_mol_ids(mol_id1, mol_id2):
    from y4_python.python_modules.orbital_similarity import radial_distribution_difference
    db = DB("y4_python/11k_molecule_database_eV.db")
    row1 = db.get_row_from_mol_id(mol_id1)
    row2 = db.get_row_from_mol_id(mol_id2)
    i_mol_id, i_pm7, i_blyp, i_smiles, i_fp, i_homo, i_lumo = row1
    j_mol_id, j_pm7, j_blyp, j_smiles, j_fp, j_homo, j_lumo = row2
    return radial_distribution_difference(i_homo, j_homo)
def time_euclidean_distance_learning():
    "run learning with Euc distance to see the time taken and compare with chemical_distance"

    db = DB("y4_python/11k_molecule_database_eV.db")
    reg = MyRegression(db)
    mol_list = db.get_mol_ids()
    pm7_energies = db.get_pm7_energies()
    blyp_energies = db.get_blyp_energies()
    deviation_list = (list(
        map(reg.distance_from_regress, pm7_energies, blyp_energies)))

    main_euclidean_distance(k_neighbours=5,
                            k_folds=10,
                            metric_params={},
                            mol_list=mol_list,
                            deviation_list=deviation_list)
Example #6
0
    def setUp(self) -> None:
        test_db = sqlite3.connect(":memory:")
        self.c = test_db.cursor()
        self.c.execute("CREATE TABLE data (fingerprints text)")

        self.smiles: np.ndarray = DB().get_smiles()
        self.smiles_sample = sample(list(self.smiles), 50)

        self.pairs = combinations(self.smiles_sample, 2)
def time_structural_distance():
    db = DB("y4_python/11k_molecule_database_eV.db")
    fingerprint_list = db.get_fingerprints()

    def run():
        ctr = 0
        for idx in range(0, len(fingerprint_list), 2):
            try:
                j = fingerprint_list[idx + 1]
            except:
                break
            i = fingerprint_list[idx]
            structural_distance(i, j)
            ctr += 1
        print("counter = ", ctr)

    number = 1
    t = timeit(lambda: run(), number=number)
    print(t)
def time_RDF_and_structural():
    db = DB("y4_python/11k_molecule_database_eV.db")
    all_ = db.get_all()
    fp_list = db.get_fingerprints()
    homo_list = db.get_homo_molecular_orbitals()
    lumo_list = db.get_lumo_molecular_orbitals()

    def run():
        ctr = 0
        for idx in range(0, len(all_), 2):
            try:
                j = np.array([idx + 1])
            except:
                break
            i = np.array([idx])
            chemical_distance(i,
                              j,
                              homo_coeff=1.0,
                              lumo_coeff=0.0,
                              fingerprint_list=fp_list,
                              homo_orbital_list=homo_list,
                              lumo_orbital_list=lumo_list,
                              **{
                                  "c_struct": 1.0,
                                  "c_orbital": 1.0,
                                  "radial_distribution_coeff": 1.0
                              })
            ctr += 1
        #print("counter = ", ctr)

    number = 100
    t = timeit(lambda: run(), number=number)
    print(t)
def time_chemical_distance_learning():
    "run learning with different params to see what takes the longest. Use limited set of data so it doesn't take so long on each run."

    db = DB("y4_python/11k_molecule_database_eV.db")
    reg = MyRegression(db)

    cutoff = 1000

    mol_list = db.get_mol_ids()[:cutoff]
    pm7_energies = db.get_pm7_energies()[:cutoff]
    blyp_energies = db.get_blyp_energies()[:cutoff]
    deviation_list = (list(
        map(reg.distance_from_regress, pm7_energies, blyp_energies)))
    fingerprint_list = db.get_fingerprints()[:cutoff]
    homo_list = db.get_homo_molecular_orbitals()[:cutoff]
    lumo_list = db.get_lumo_molecular_orbitals()[:cutoff]

    params = MetricParams(homo_coeff=1.0,
                          lumo_coeff=0.0,
                          fingerprint_list=fingerprint_list,
                          homo_orbital_list=homo_list,
                          lumo_orbital_list=lumo_list,
                          c_struct=0.0,
                          c_orbital=1.0,
                          inertia_coefficient=0.0,
                          IPR_coefficient=0.0,
                          N_coefficient=0.0,
                          O_coefficient=0.0,
                          S_coefficient=0.0,
                          P_coefficient=0.0,
                          radial_distribution_coeff=1.0)

    results = main_chemical_distance(k_neighbours=5,
                                     k_folds=10,
                                     metric_params=params,
                                     mol_list=mol_list,
                                     deviation_list=deviation_list,
                                     save=False)
    print(results)
def time_chemical_distance_metric():
    ""
    db = DB("y4_python/11k_molecule_database_eV.db")
    all_ = db.get_all_cursor()
    fingerprint_list = db.get_fingerprints()
    homo_list = db.get_homo_molecular_orbitals()
    lumo_list = db.get_lumo_molecular_orbitals()
    idx = 0
    rows = []

    i = np.array([0])
    j = np.array([1])

    def run_euc():
        return euc(i, j)

    def run(it):
        for a, b in it:
            i = np.array([a])
            j = np.array([b])
            return chemical_distance(i,
                                     j,
                                     homo_coeff=1.0,
                                     lumo_coeff=0.0,
                                     fingerprint_list=fingerprint_list,
                                     homo_orbital_list=homo_list,
                                     lumo_orbital_list=lumo_list,
                                     c_orbital=1.0,
                                     c_struct=1.0,
                                     inertia_coefficient=0.0,
                                     IPR_coefficient=0.0,
                                     N_coefficient=0.0,
                                     O_coefficient=0.0,
                                     S_coefficient=0.0,
                                     P_coefficient=0.0,
                                     radial_distribution_coeff=1.0)

    number = 100_000
    it_range = 10
    it = combinations(range(it_range), 2)
    print(
        f"len(it) = {it_range} * ({it_range-1}) / 2 = {it_range*(it_range-1) / 2}"
    )
    print(timeit(lambda: run(it), number=number) / number)

    print(timeit(lambda: run_euc(), number=number) / number)
def time_euc():
    db = DB("y4_python/11k_molecule_database_eV.db")
    all_ = db.get_all()
    fp_list = db.get_fingerprints()
    homo_list = db.get_homo_molecular_orbitals()
    lumo_list = db.get_lumo_molecular_orbitals()

    i = np.array([1000, 1000, 1000])
    j = np.array([2000, 2000, 2000])

    def run():
        ctr = 0
        for idx in range(0, len(all_), 2):
            euc(i, j)
            ctr += 1
        #print("counter = ", ctr)

    number = 100
    t = timeit(lambda: run(), number=number)
    print(t)
                                            mol_list,
                                            deviation_list,
                                            save=False)
    return rmse


if __name__ == "__main__":
    c_struct_lim = (0.0, 10.0)
    c_orbital_lim = (0.0, 10.0)
    bounds = [c_struct_lim] + [
        c_orbital_lim
    ]  # this will be whatever hyperparameters you want to optimize. The length of this sequence is what indicates the number of hyperparameters mini_args = (X, y, condition,fixed_hyperparams) # this will be a tuple with the rest of arguments needed for your function NCPU = 20 # this will be the number of CPUs you want

    num_CPU = 20

    db = DB("y4_python/11k_molecule_database_eV.db")
    my_regression = MyRegression(db)
    mol_list = db.get_mol_ids()
    pm7_energies = db.get_pm7_energies()
    blyp_energies = db.get_blyp_energies()
    deviation_list = (list(
        map(my_regression.distance_from_regress, pm7_energies, blyp_energies)))
    fingerprint_list = db.get_fingerprints()
    homo_molecular_orbital_list = db.get_homo_molecular_orbitals()
    lumo_molecular_orbital_list = db.get_lumo_molecular_orbitals()

    fixed_hyperparams = MetricParams(
        homo_coeff=1.0,
        lumo_coeff=0.0,
        fingerprint_list=fingerprint_list,
        homo_orbital_list=homo_molecular_orbital_list,
def Delta_Ei_from_mol_id(mol_id):
    db = DB("y4_python/11k_molecule_database_eV.db")
    regression = MyRegression(db)
    row1 = db.get_row_from_mol_id(mol_id)
    i_mol_id, i_pm7, i_blyp, i_smiles, i_fp, i_homo, i_lumo = row1
    return regression.distance_from_regress(i_pm7, i_blyp)
def plot_testing_results(relative_file_path, x_max=None):
    db = DB("y4_python/11k_molecule_database_eV.db")
    reg = MyRegression(db)

    plot_testing_metric_results(relative_file_path, reg, x_max=x_max)