Beispiel #1
0
 def to_csv(self, data_all):
     """store to csv"""
     if self.store:
         if isinstance(self.store, str):
             path = self.store
         else:
             path = os.getcwd()
         file_new_name = "_".join((str(self.pop), str(self.gen),
                                   str(self.mutate_prob), str(self.mate_prob),
                                   str(time.time())))
         try:
             st = Store(path)
             st.to_csv(data_all, file_new_name, transposition=True)
             print("store data to ", path, file_new_name)
         except (IOError, PermissionError):
             st = Store(os.getcwd())
             st.to_csv(data_all, file_new_name, transposition=True)
             print("store data to ", os.getcwd(), file_new_name)
Beispiel #2
0
def eaSimple(population,
             toolbox,
             cxpb,
             mutpb,
             ngen,
             stats=None,
             halloffame=None,
             verbose=__debug__,
             pset=None,
             store=True):
    """

    Parameters
    ----------
    population
    toolbox
    cxpb
    mutpb
    ngen
    stats
    halloffame
    verbose
    pset
    store
    Returns
    -------

    """
    rst = random.getstate()
    len_pop = len(population)
    logbook = Logbook()
    logbook.header = [] + (stats.fields if stats else [])
    data_all = {}
    random.setstate(rst)

    for gen in range(1, ngen + 1):
        "评价"
        rst = random.getstate()
        """score"""
        invalid_ind = [ind for ind in population if not ind.fitness.valid]
        fitnesses = toolbox.parallel(iterable=population)
        for ind, fit, in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit[0],
            ind.expr = fit[1]
            ind.y_dim = fit[2]
            ind.withdim = fit[3]
        random.setstate(rst)

        rst = random.getstate()
        """elite"""
        add_ind = []
        add_ind1 = toolbox.select_kbest_target_dim(population,
                                                   K_best=0.05 * len_pop)
        add_ind += add_ind1
        elite_size = len(add_ind)
        random.setstate(rst)

        rst = random.getstate()
        """score"""

        random.setstate(rst)

        rst = random.getstate()
        """record"""
        if halloffame is not None:
            halloffame.update(add_ind1)
            if len(halloffame.items
                   ) > 0 and halloffame.items[-1].fitness.values[0] >= 0.9999:
                print(halloffame.items[-1])
                print(halloffame.items[-1].fitness.values[0])
                break
        random.setstate(rst)

        rst = random.getstate()
        """Dynamic output"""

        record = stats.compile_(population) if stats else {}
        logbook.record(gen=gen, pop=len(population), **record)

        if verbose:
            print(logbook.stream)
        random.setstate(rst)
        """crossover, mutate"""
        offspring = toolbox.select_gs(population, len_pop - elite_size)
        # Vary the pool of individuals
        offspring = varAnd(offspring, toolbox, cxpb, mutpb)

        rst = random.getstate()
        """re-run"""
        offspring.extend(add_ind)
        population[:] = offspring
        random.setstate(rst)

    store = Store()
    store.to_csv(data_all)
    return population, logbook
Beispiel #3
0
    """union"""
    index_all = [tuple(index[0]) for _ in index_all for index in _[:10]]
    index_all = list(set(index_all))

    """get x_name and abbr"""
    index_all_name = name_to_name(X_frame.columns.values, search=[i for i in index_all],
                                  search_which=0, return_which=(1,), two_layer=True)

    index_all_name = [list(set([re.sub(r"_\d", "", j) for j in i])) for i in index_all_name]
    [i.sort() for i in index_all_name]
    index_all_abbr = name_to_name(name_init, abbr_init, search=index_all_name, search_which=1, return_which=2,
                                  two_layer=True)

    store.to_pkl_pd(index_all, "index_all")
    store.to_csv(index_all_name, "index_all_name")
    store.to_csv(index_all_abbr, "index_all_abbr")

    ugs = UGS(estimator_all, index_all, estimator_n=[2, 3], n_jobs=3)
    ugs.fit(X, y)
    # re = gs.cv_score_all(index_all)
    binary_distance = ugs.cal_binary_distance_all(index_all, estimator_i=3)
    # slice_k  = gs._cv_predict_all(estimator_i=3)
    groups = ugs.cal_group(estimator_i=3, printing=True, print_noise=0.2, pre_binary_distance_all=binary_distance)
    ugs.cluster_print(binary_distance, highlight=[1, 2, 3])

    # groups = ugs.cal_t_group(printing=False, pre_group=None)
    # ss=ugs.select_ugs(alpha=0.01)
    # results = gs.select_gs(alpha=0.01)
    # gs.cal_group(eps=0.10, estimator_i=1, printing=True, pre_binary_distance_all=slice_g, print_noise=0.1,
    #              node_name=index_all_abbr)
Beispiel #4
0
scatter(ytest, y_pre_test, strx='y_true($10^4$T)', stry='y_predict($10^4$T)')
scatter(ytrain, y_pre_train, strx='y_true($10^4$T)', stry='y_predict($10^4$T)')


def scatter2(x, y_true, y_predict, strx='y_true', stry1='y_true(GWh)', stry2='y_predict', stry="y"):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    l1 = ax.scatter(x, y_true, marker='o', s=50, alpha=0.7, c='orange', linewidths=None, edgecolors='blue')
    ax.plot(x, y_true, '-', ms=5, lw=2, alpha=0.7, color='black')
    l2 = ax.scatter(x, y_predict, marker='^', s=50, alpha=0.7, c='green', linewidths=None, edgecolors='blue')
    ax.plot(x, y_predict, '-', ms=5, lw=2, alpha=0.7, color='green')
    # ax.plot([min(x), max(x)], [min(x), max(x)], '--', ms=5, lw=2, alpha=0.7, color='black')
    plt.xlabel(strx)
    plt.legend((l1, l2),
               (stry1, stry2),
               loc='upper left')
    plt.ylabel(stry)
    plt.show()


a = np.arange(2000, 2020)

scatter2(a, y[::-1], y_[::-1], strx='year', stry="y($10^4$T)", stry1='y_true($10^4$T)', stry2='y_predict($10^4$T)')

# #导出
print(x_frame.iloc[:, :].columns.values[ba.support_])
store.to_pkl_sk(ba.estimator_, "model")
all_import["y_predict"] = y_
store.to_csv(all_import, "predict")
Beispiel #5
0
    dim_init = data.dims

    index_all_name = name_to_name(X_frame.columns.values,
                                  search=[i for i in index_slice],
                                  search_which=0,
                                  return_which=(1, ),
                                  two_layer=True)
    index_all_name = [
        list([re.sub(r"_\d", "", j) for j in i]) for i in index_all_name
    ]

    index_all_dim = name_to_name(name_init,
                                 dim_init,
                                 search=index_all_name,
                                 search_which=1,
                                 return_which=2,
                                 two_layer=True)

    dim_target = [
        dimension_check(list(dim), np.array([1, 2, -2, 0, 0, 0, 0]))
        for dim in index_all_dim
    ]
    dim_1 = [dimension_check(dim) for dim in index_all_dim]

    result['dim1'] = dim_1
    result['dim_target'] = dim_target

    result = result.sort_values(by="all_mean", ascending=False)
    store.to_csv(result, "result")
Beispiel #6
0
                                 batch_size=40,
                                 re_hall=3,
                                 n_jobs=12,
                                 mate_prob=0.9,
                                 max_value=5,
                                 mutate_prob=0.8,
                                 tq=False,
                                 dim_type="coef",
                                 re_Tree=0,
                                 store=False,
                                 random_state=12,
                                 verbose=True,
                                 stats={
                                     "fitness_dim_max": ["max"],
                                     "dim_is_target": ["sum"]
                                 },
                                 add_coef=True,
                                 inner_add=False,
                                 cal_dim=True,
                                 vector_add=True,
                                 personal_map=False)
            # b = time.time()
            exps = bl.run()
            print([i.coef_expr for i in exps])
            score = exps.keys[0].values[0]
            name = group_str(exps[0], pset0, feature_name=True)
            dicts["s%s" % i] = [score, name]
            print(i)

        store.to_csv(dicts, model="a+")
Beispiel #7
0
    ]

    select = ['volume'] + [j + "_%i" % i for j in select[1:] for i in range(2)]

    X_frame = data225_import[select]
    y_frame = data225_import['exp_gap']

    X = X_frame.values
    y = y_frame.values

    name, rep_name = getName(X_frame)
    x0, x1, x2, x3, x4, x5, x6 = rep_name
    expr01 = sympy.log(1 / (x1 + x2) * x0 / (x5 + x6) * x4 / x3)

    results = calculateExpr(expr01,
                            pset=None,
                            x=X,
                            y=y,
                            score_method=r2_score,
                            add_coeff=True,
                            del_no_important=False,
                            filter_warning=True,
                            terminals=rep_name,
                            inter_add=True,
                            iner_add=False,
                            random_add=False)
    print(select)
    print(results)

    store.to_csv(data216_225_221import, "plot221225216")
Beispiel #8
0
param_grid3 = [{'n_estimators': [100, 200], 'learning_rate': [0.1, 0.05]}]

# 2 model
ref = RFECV(me2, cv=3)
x_ = ref.fit_transform(x, y)
gd = GridSearchCV(me2, cv=3, param_grid=param_grid2, scoring="r2", n_jobs=1)
gd.fit(x_, y)
score = gd.best_score_

# 1,3 model
# gd = GridSearchCV(me1, cv=3, param_grid=param_grid1, scoring="r2", n_jobs=1)
# gd.fit(x,y)
# es = gd.best_estimator_
# sf = SelectFromModel(es, threshold=None, prefit=False,
#                  norm_order=1, max_features=None)
# sf.fit(x,y)
# feature = sf.get_support()
#
# gd.fit(x[:,feature],y)
# score = gd.best_score_

# 其他模型
# 穷举等...

# 导出
# pd.to_pickle(gd,r'C:\Users\Administrator\Desktop\skk\gd_model')
# pd.read_pickle(r'C:\Users\Administrator\Desktop\skk\gd_model')
store.to_pkl_sk(gd)
store.to_csv(x)
store.to_txt(score)
Beispiel #9
0
    x_rame = (all_import_title['electron number_0'] + all_import_title['electron number_1']) / all_import_title[
        'cell volume']

    all_import_title.insert(10, "electron density", x_rame, )

    # store.to_csv(all_import_title, "all_import_title", reverse=False)

    all_import = all_import_title.drop(
        ['name_number', "cell density", 'name_number', "name", "structure", "structure_type", "space_group",
         "reference", 'material_id',
         'composition', "com_0", "com_1"], axis=1)

    all_import = all_import.iloc[np.where(all_import['group_number'] == 225)[0]]
    all_import = all_import.drop(['group_number'], axis=1)

    store.to_csv(all_import, "all_import", transposition=False)


    def get_abbr():
        name = ["electron density", "cell density", 'cell volume', "component"]
        abbrTex = [r"$\rho_e$", r"$\rho_c$", "$V_c$", "$com$"]
        abbr = [r"rho_e", r"rho_c", "V_c", "com"]

        for i, j, k in zip(name, abbrTex, abbr):
            name_and_abbr.insert(0, i, [j, k])


    get_abbr()

    store.to_csv(name_and_abbr, "name_and_abbr", transposition=False)
Beispiel #10
0
def eaSimple(population,
             toolbox,
             cxpb,
             mutpb,
             ngen,
             stats=None,
             halloffame=None,
             verbose=__debug__,
             pset=None,
             store=True):
    """

    Parameters
    ----------
    population
    toolbox
    cxpb
    mutpb
    ngen
    stats
    halloffame
    verbose
    pset
    store
    Returns
    -------

    """
    rst = random.getstate()
    len_pop = len(population)
    logbook = Logbook()
    logbook.header = [] + (stats.fields if stats else [])
    data_all = {}
    random.setstate(rst)

    for gen in range(1, ngen + 1):
        "评价"
        rst = random.getstate()
        """score"""
        invalid_ind = [ind for ind in population if not ind.fitness.valid]
        fitnesses = toolbox.parallel(iterable=population)
        for ind, fit, in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit[0],
            ind.expr = fit[1]
            ind.dim = fit[2]
            ind.withdim = fit[3]
        random.setstate(rst)

        rst = random.getstate()
        """elite"""
        add_ind = []
        add_ind1 = toolbox.select_kbest_target_dim(population,
                                                   K_best=0.01 * len_pop)
        add_ind2 = toolbox.select_kbest_dimless(population,
                                                K_best=0.01 * len_pop)
        add_ind3 = toolbox.select_kbest(population, K_best=5)
        add_ind += add_ind1
        add_ind += add_ind2
        add_ind += add_ind3
        elite_size = len(add_ind)
        random.setstate(rst)

        rst = random.getstate()
        """score"""
        if store:
            subp = functools.partial(sub,
                                     subed=pset.rep_name_list,
                                     subs=pset.real_name_list)
            data = {
                "gen{}_pop{}".format(gen, n): {
                    "gen": gen,
                    "pop": n,
                    "score": i.fitness.values[0],
                    "expr": str(subp(i.expr)),
                }
                for n, i in enumerate(population) if i is not None
            }
            data_all.update(data)
        random.setstate(rst)

        rst = random.getstate()
        """record"""
        if halloffame is not None:
            halloffame.update(add_ind3)
            if len(halloffame.items
                   ) > 0 and halloffame.items[-1].fitness.values[0] >= 0.95:
                print(halloffame.items[-1])
                print(halloffame.items[-1].fitness.values[0])
                break
        random.setstate(rst)

        rst = random.getstate()
        """Dynamic output"""

        record = stats.compile(population) if stats else {}
        logbook.record(gen=gen, pop=len(population), **record)

        if verbose:
            print(logbook.stream)
        random.setstate(rst)
        """crossover, mutate"""
        offspring = toolbox.select_gs(population, len_pop - elite_size)
        # Vary the pool of individuals
        offspring = varAnd(offspring, toolbox, cxpb, mutpb)

        rst = random.getstate()
        """re-run"""
        offspring.extend(add_ind)
        population[:] = offspring
        random.setstate(rst)

    store = Store()
    store.to_csv(data_all)
    return population, logbook
Beispiel #11
0
    clf = Exhaustion(estimator,
                     n_select=n_select,
                     muti_grade=2,
                     muti_index=[2, X.shape[1]],
                     must_index=None,
                     n_jobs=1,
                     refit=True).fit(X, y)

    name_ = name_to_name(X_frame.columns.values,
                         search=[i[0] for i in clf.score_ex[:10]],
                         search_which=0,
                         return_which=(1, ),
                         two_layer=True)
    sc = np.array(clf.scatter)

    for i in clf.score_ex[:]:
        print(i[1])
    for i in name_:
        print(i)

    t = clf.predict(X)
    p = BasePlot()
    p.scatter(y, t, strx='True $E_{gap}$', stry='Calculated $E_{gap}$')
    plt.show()
    p.scatter(sc[:, 0], sc[:, 1], strx='Number', stry='Score')
    plt.show()

    store.to_csv(sc, method_name + "".join([str(i) for i in n_select]))
    store.to_pkl_pd(clf.score_ex,
                    method_name + "".join([str(i) for i in n_select]))
com_data = pd.read_excel(r'C:\Users\Administrator\Desktop\band_gap_exp_last\init_band_data.xlsx',
                         sheet_name='binary_4_structure', header=0, skiprows=None, index_col=0, names=None)
composition = pd.Series(map(eval, com_data['composition']))
composition_mp = pd.Series(map(mg.Composition, composition))
"""for element site"""
com_mp = pd.Series([i.to_reduced_dict for i in composition_mp])
# com_mp = composition_mp
all_import = data.csv.all_import
id_structures = data.id_structures
structures = id_structures
vor_area = count_voronoinn(structures, mess="area")
vor_dis = count_voronoinn(structures, mess="face_dist")
vor = pd.DataFrame()
vor.insert(0, 'vor_area0', vor_area[:, 0])
vor.insert(0, 'face_dist0', vor_dis[:, 0])
vor.insert(0, 'vor_area1', vor_area[:, 1])
vor.insert(0, 'face_dist1', vor_dis[:, 1])

data_title = all_import[
    ['name_number', "x_name", "structure", "structure_type", "space_group", "reference", 'material_id', 'composition',
     'exp_gap', 'group_number']]

data_tail = all_import.drop(
    ['name_number', "x_name", "structure", "structure_type", "space_group", "reference", 'material_id', 'composition',
     'exp_gap', 'group_number'], axis=1)

data_import = data_title.join(vor[["face_dist0", "vor_area0", "face_dist1", "vor_area1"]])
data_import = data_import.join(data_tail)

store.to_csv(data_import, "all_import")
Beispiel #13
0
    all_import_title = com_data.join(ele_ratio)
    all_import_title = all_import_title.join(depart_elements_table)
    """sub density to e density"""
    select2 = ['electron number_0', 'electron number_1', 'cell volume']
    x_rame = (all_import_title['electron number_0'] +
              all_import_title['electron number_1']
              ) / all_import_title['cell volume']
    all_import_title['cell density'] = x_rame
    all_import_title.rename(columns={'cell density': "electron density"},
                            inplace=True)

    name = [
        "electron density" if i == "cell density" else i
        for i in name_and_abbr[0]
    ]
    abbr = [r"$\rho_e$" if i == r"$\rho_c$" else i for i in name_and_abbr[1]]
    name_and_abbr = [name, abbr]
    dims[-3] = np.array([0, -3, 0, 0, 0, 0, 0])

    store.to_csv(all_import_title, "all_import_title")
    all_import = all_import_title.drop([
        'name_number', 'name_number', "name", "structure", "structure_type",
        "space_group", "reference", 'material_id', 'composition', "com_0",
        "com_1"
    ],
                                       axis=1)

    store.to_pkl_pd(dims, "dims")
    store.to_pkl_pd(name_and_abbr, "name_and_abbr")
    store.to_csv(all_import, "all_import")
Beispiel #14
0
# # # 预处理
# minmax = MinMaxScaler()
# x = minmax.fit_transform(x)
x_, y_ = shuffle(x, y, random_state=2)

# # # 建模
method_all = ['SVR-set', "GPR-set", "RFR-em", "AdaBR-em", "DTR-em", "LASSO-L1", "BRR-L1"]
methods = method_pack(method_all=method_all,
                      me="reg", gd=True)
pre_y = []
ests = []
for name, methodi in zip(method_all, methods):
    methodi.cv = 5
    methodi.scoring = "neg_root_mean_squared_error"
    gd = methodi.fit(X=x_, y=y_)
    score = gd.best_score_
    est = gd.best_estimator_
    print(name, "neg_root_mean_squared_error", score)
    score = cross_val_score(est, X=x_, y=y_, scoring="r2", ).mean()
    print(name, "r2", score)
    pre_yi = est.predict(x)
    pre_y.append(pre_yi)
    ests.append(est)
    store.to_pkl_pd(est, name)

pre_y.append(y)
pre_y = np.array(pre_y).T
pre_y = pd.DataFrame(pre_y)
pre_y.columns = method_all + ["realy_y"]
store.to_csv(pre_y, "wrtem_result")
Beispiel #15
0
    pre_y = sl.predict(x)

    r = np.corrcoef(np.vstack((pre_y, y)))[1, 0]

    error = np.mean(np.abs((y - pre_y) / y))

    r2 = sl.score(x, y, "r2")
    mae = sl.score(x, y, "neg_mean_absolute_error")
    sl.loop.cpset.cv = 5
    r2_cv = sl.cv_result(refit=False)
    print("r:{},error:{},r2:{},MAE:{},r2_cv:{}".format(r, error, r2, mae,
                                                       r2_cv[0]))

    data = sl.loop.top_n(20, ascending=False)
    st.end()
    st.to_csv(data, file_new_name="top_n")

# if __name__ == "__main__":
#     pa_factor, pa_dim = Dim.convert_to(10 * 6 * pa)
#     ###########第一个###########
#     """数据"""
#     com_data = pd.read_csv(r'FCC-BCC.csv')
#     x = com_data.iloc[:, :-1].values
#     y = com_data.iloc[:, -1].values
#     x, y = shuffle(x, y, random_state=0)
#
#     st = Store("FCC-BCC_result_error_no_intercept")
#     st.start()
#     sl = SymbolLearning(loop=r'MultiMutateLoop', cal_dim=True,  pop=5000, dim_type = pa_dim,
#                         gen=50, add_coef=True, re_hall=2,
#                         inter_add=False,batch_size=50,