clf = Exhaustion(estimator,
                     n_select=n_select,
                     muti_grade=2,
                     muti_index=[2, X.shape[1]],
                     must_index=None,
                     n_jobs=1,
                     refit=True).fit(X, y)

    name_ = name_to_name(X_frame.columns.values,
                         search=[i[0] for i in clf.score_ex[:10]],
                         search_which=0,
                         return_which=(1, ),
                         two_layer=True)
    sc = np.array(clf.scatter)

    for i in clf.score_ex[:]:
        print(i[1])
    for i in name_:
        print(i)

    t = clf.predict(X)
    p = BasePlot()
    p.scatter(y, t, strx='True $E_{gap}$', stry='Calculated $E_{gap}$')
    plt.show()
    p.scatter(sc[:, 0], sc[:, 1], strx='Number', stry='Score')
    plt.show()

    store.to_csv(sc, method_name + "".join([str(i) for i in n_select]))
    store.to_pkl_pd(clf.score_ex,
                    method_name + "".join([str(i) for i in n_select]))
Exemple #2
0
method_all = [
    'SVR-set', "GPR-set", "RFR-em", "AdaBR-em", "DTR-em", "LASSO-L1", "BRR-L1"
]
methods = method_pack(method_all=method_all, me="reg", gd=True)
pre_y = []
ests = []
for name, methodi in zip(method_all, methods):
    methodi.cv = 5
    methodi.scoring = "neg_root_mean_squared_error"
    gd = methodi.fit(X=x_, y=y_)
    score = gd.best_score_
    est = gd.best_estimator_
    print(name, "neg_root_mean_squared_error", score)
    score = cross_val_score(
        est,
        X=x_,
        y=y_,
        scoring="r2",
    ).mean()
    print(name, "r2", score)
    pre_yi = est.predict(x)
    pre_y.append(pre_yi)
    ests.append(est)
    store.to_pkl_pd(est, name)

pre_y.append(y)
pre_y = np.array(pre_y).T
pre_y = pd.DataFrame(pre_y)
pre_y.columns = method_all + ["realy_y"]
store.to_csv(pre_y, "wrtem_result")
               + [np.array([0, 0, 0, 1, 0, 0, 0])] * 2 + [np.array([0, 0, 0, 0, 0, 0, 0])] + [
                   np.array([0, 3, 0, 0, 0, 0, 0])] \
               + [np.array([0, 0, 0, 0, 0, 0, 0])] + [np.array([0, 3, 0, 0, 0, -1, 0])] + [
                   np.array([1, -3, 0, 0, 0, 0, 0])] \
               + [np.array([1, -1, -2, 0, 0, 0, 0])]

        dims.extend([np.array([0, 1, 0, 0, 0, 0, 0]), np.array([0, 2, 0, 0, 0, 0, 0]), np.array([0, 1, 0, 0, 0, 0, 0]),
                     np.array([0, 2, 0, 0, 0, 0, 0]),
                     np.array([1, -3, 0, 0, 0, 0, 0]), np.array([0, 3, 0, 0, 0, 0, 0]), np.array([0, 0, 0, 0, 0, 0, 0])
                     ])
        return dims


    name_and_abbr = get_abbr()
    dims = get_dim()
    store.to_pkl_pd(dims, "dims")
    store.to_pkl_pd(name_and_abbr, "name_and_abbr")

    element_table = element_table.iloc[5:, 7:]
    feature_select = [
        'lattice constants a',
        'lattice constants b',
        'lattice constants c',
        'radii atomic(empirical)',
        'radii atomic(clementi)',
        'radii ionic(pauling)',
        'radii ionic(shannon)',
        'radii covalent',
        'radii covalent 2',
        'radii metal(waber)',
        'distance valence electron(schubert)',
Exemple #4
0
        estimator_all.append(GridSearchCV(me1, cv=cv1, scoring=scoring1, param_grid=param_grid1, n_jobs=1))

    """union"""
    index_all = [tuple(index[0]) for _ in index_all for index in _[:10]]
    index_all = list(set(index_all))

    """get x_name and abbr"""
    index_all_name = name_to_name(X_frame.columns.values, search=[i for i in index_all],
                                  search_which=0, return_which=(1,), two_layer=True)

    index_all_name = [list(set([re.sub(r"_\d", "", j) for j in i])) for i in index_all_name]
    [i.sort() for i in index_all_name]
    index_all_abbr = name_to_name(name_init, abbr_init, search=index_all_name, search_which=1, return_which=2,
                                  two_layer=True)

    store.to_pkl_pd(index_all, "index_all")
    store.to_csv(index_all_name, "index_all_name")
    store.to_csv(index_all_abbr, "index_all_abbr")

    ugs = UGS(estimator_all, index_all, estimator_n=[2, 3], n_jobs=3)
    ugs.fit(X, y)
    # re = gs.cv_score_all(index_all)
    binary_distance = ugs.cal_binary_distance_all(index_all, estimator_i=3)
    # slice_k  = gs._cv_predict_all(estimator_i=3)
    groups = ugs.cal_group(estimator_i=3, printing=True, print_noise=0.2, pre_binary_distance_all=binary_distance)
    ugs.cluster_print(binary_distance, highlight=[1, 2, 3])

    # groups = ugs.cal_t_group(printing=False, pre_group=None)
    # ss=ugs.select_ugs(alpha=0.01)
    # results = gs.select_gs(alpha=0.01)
    # gs.cal_group(eps=0.10, estimator_i=1, printing=True, pre_binary_distance_all=slice_g, print_noise=0.1,