예제 #1
0
    def remove_by_y(self, y_):
        corr = self.cov_y(self.data, y_)
        corr = self.feature_fold(self, corr)
        lcount = self.list_count
        fea_all = []
        score = name_to_name(corr,
                             search=lcount,
                             search_which=0,
                             return_which=(1, ),
                             two_layer=True)
        for score_i, list_i in zip(score, lcount):
            indexs = np.argmax(score_i)
            feature_index = list_i[indexs]
            fea_all.append(feature_index)

        fea_all = sorted(list(set(fea_all)))
        return fea_all
예제 #2
0
                             scoring=scoring1,
                             param_grid=param_grid1,
                             n_jobs=1)
    # n_select = [1,]
    n_select = (2, 3)
    clf = Exhaustion(estimator,
                     n_select=n_select,
                     muti_grade=2,
                     muti_index=[2, X.shape[1]],
                     must_index=None,
                     n_jobs=1,
                     refit=True).fit(X, y)

    name_ = name_to_name(X_frame.columns.values,
                         search=[i[0] for i in clf.score_ex[:10]],
                         search_which=0,
                         return_which=(1, ),
                         two_layer=True)
    sc = np.array(clf.scatter)

    for i in clf.score_ex[:]:
        print(i[1])
    for i in name_:
        print(i)

    t = clf.predict(X)
    p = BasePlot()
    p.scatter(y, t, strx='True $E_{gap}$', stry='Calculated $E_{gap}$')
    plt.show()
    p.scatter(sc[:, 0], sc[:, 1], strx='Number', stry='Score')
    plt.show()
예제 #3
0
    method_name = ['GPR-set', 'SVR-set', 'KRR-set', 'KNR-set', 'GBR-em', 'AdaBR-em', 'RFR-em', "DTR-em"]

    index_all = [data.pickle_pd.GPR_set23, data.pickle_pd.SVR_set23, data.pickle_pd.KRR_set23]

    estimator_all = []
    for i in method_name:
        me1, cv1, scoring1, param_grid1 = dict_method_reg()[i]
        estimator_all.append(GridSearchCV(me1, cv=cv1, scoring=scoring1, param_grid=param_grid1, n_jobs=1))

    """union"""
    # [print(_[0]) for _ in index_all]
    index_slice = [tuple(index[0]) for _ in index_all for index in _[:5]]
    index_slice = list(set(index_slice))

    """get x_name and abbr"""
    index_all_name = name_to_name(X_frame.columns.values, search=[i for i in index_slice],
                                  search_which=0, return_which=(1,), two_layer=True)

    index_all_name = [list(set([re.sub(r"_\d", "", j) for j in i])) for i in index_all_name]
    [i.sort() for i in index_all_name]
    index_all_abbr = name_to_name(name_init, abbr_init, search=index_all_name, search_which=1, return_which=2,
                                  two_layer=True)

    parto = []
    table = []

    for i in range(2):
        print(i)
        X = X_frame.values
        y = y_frame.values

        scal = preprocessing.MinMaxScaler()
예제 #4
0
if __name__ == '__main__':
    import pandas as pd

    store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\4.symbol')
    data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp')
    data_import = data.csv().all_import
    name_init, abbr_init = data.pickle_pd().name_and_abbr

    select = [
        'destiny', 'valence electron number',
        'distance core electron(schubert)'
    ]

    X_frame_abbr = name_to_name(name_init,
                                abbr_init,
                                search=select,
                                search_which=1,
                                return_which=2,
                                two_layer=False)

    select = ['destiny'
              ] + [j + "_%i" % i for j in select[1:] for i in range(2)]

    select_abbr = ['$\\rho_c$'] + [
        j + "_%i" % i for j in X_frame_abbr[1:] for i in range(2)
    ]

    data216_import = data_import.iloc[np.where(
        data_import['group_number'] == 216)[0]]
    data225_import = data_import.iloc[np.where(
        data_import['group_number'] == 225)[0]]
    data216_225_import = pd.concat((data216_import, data225_import))
    y = y_frame.values
    #
    # """calculate corr"""
    corr = Corr(threshold=0.90, muti_grade=2, muti_index=[3, len(X)])
    corr.fit(X_frame)
    cof_list = corr.count_cof()
    #
    """get x_name and abbr"""

    X_frame_name = corr.transform(X_frame.columns.values)
    X_frame_name = [i.replace("_0", "") for i in X_frame_name]

    X_frame_abbr = name_to_name(name_and_abbr.columns.values,
                                list(name_and_abbr.iloc[0, :]),
                                search=X_frame_name,
                                search_which=1,
                                return_which=[
                                    2,
                                ],
                                two_layer=False)
    """rename"""
    # cov = pd.DataFrame(corr.cov_shrink)
    # cov = cov.set_axis(X_frame_abbr, axis='index', inplace=False)
    # cov = cov.set_axis(X_frame_abbr, axis='columns', inplace=False)
    #
    # fig = plt.figure()
    # fig.add_subplot(111)
    # sns.heatmap(cov, vmin=-1, vmax=1, cmap="bwr", linewidths=0.3, xticklabels=True, yticklabels=True, square=True,
    #             annot=True, annot_kws={'size': 3})
    # plt.show()
    #
    corr_plot(corr.cov_shrink,
예제 #6
0
    X_frame = data225_import.drop(['exp_gap', 'group_number'], axis=1)
    y_frame = data225_import['exp_gap']
    X = X_frame.values
    y = y_frame.values

    """calculate corr"""
    corr = Corr(threshold=0.90, muti_grade=2, muti_index=[2, len(X)])
    corr.fit(X_frame)
    cof_list = corr.count_cof()

    """get x_name and abbr"""

    X_frame_name = corr.transform(X_frame.columns.values)
    X_frame_name = [i.replace("_0", "") for i in X_frame_name]

    X_frame_abbr = name_to_name(name_init, abbr_init, search=X_frame_name, search_which=1, return_which=2,
                                two_layer=False)

    """rename"""
    cov = pd.DataFrame(corr.cov_shrink)
    cov = cov.set_axis(X_frame_abbr, axis='index', inplace=False)
    cov = cov.set_axis(X_frame_abbr, axis='columns', inplace=False)

    fig = plt.figure()
    fig.add_subplot(111)
    sns.heatmap(cov, vmin=-1, vmax=1, cmap="bwr", linewidths=0.3, xticklabels=True, yticklabels=True, square=True,
                annot=True, annot_kws={'size': 3})
    plt.show()
    corr_plot(corr.cov_shrink, X_frame_abbr, left_down="fill", right_top="pie", threshold_right=0, front_raito=0.5)

    list_name, list_abbr = name_to_name(X_frame_name, X_frame_abbr, search=corr.list_count, search_which=0,
                                        return_which=(1, 2),