Beispiel #1
0
 def to_csv(self, data_all):
     if self.store:
         if isinstance(self.store, str):
             path = self.store
         else:
             path = os.getcwd()
         file_new_name = "_".join((str(self.pop), str(self.gen),
                                   str(self.mutate_prob), str(self.mate_prob),
                                   str(time.time())))
         try:
             st = Store(path)
             st.to_csv(data_all, file_new_name)
             print("store data to ", path, file_new_name)
         except (IOError, PermissionError):
             st = Store(os.getcwd())
             st.to_csv(data_all, file_new_name)
             print("store data to ", os.getcwd(), file_new_name)
# @License: BSD 3-Clause
"""
this is a description
"""
import numpy as np
import pandas as pd

from featurebox.selection.corr import Corr
from featurebox.tools.exports import Store
from featurebox.tools.imports import Call
from featurebox.tools.tool import name_to_name

# import seaborn as sns

if __name__ == "__main__":
    store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\2.corr')
    data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp')
    all_import = data.csv().all_import

    name_init, abbr_init = data.pickle_pd().name_and_abbr

    data_import = all_import
    data225_import = data_import.iloc[np.where(
        data_import['group_number'] == 225)[0]]
    X_frame = data225_import.drop(['exp_gap', 'group_number'], axis=1)
    y_frame = data225_import['exp_gap']
    X = X_frame.values
    y = y_frame.values
    """calculate corr"""
    corr = Corr(threshold=0.90, muti_grade=2, muti_index=[2, len(X)])
    corr.fit(X_frame)
#                                   [-1, ["exp", "log","Rec", "Self"]],
#                               ],
#                               definate_variable=[
#                                                  [-3, [0]],
#                                                  [-2, [1]],
#                                                  [-1, [2]]],
#                               operate_linkage=[[-1, -2], ],
#                               # variable_linkage = None
#                               )
#
#     result = mainPart(X, y, pset1, pop_n=500, random_seed=2, cxpb=0.8, mutpb=0.1, ngen=20,
#                        inter_add=True, iner_add=False, random_add=False, score=[explained_variance_score, r2_score])
#     ret = result[2][1]

if __name__ == "__main__":
    store = Store(
        r'C:\Users\Administrator\Desktop\band_gap_exp_last\4.symbollearning')
    data_cluster = Call(
        r'C:\Users\Administrator\Desktop\band_gap_exp_last\1.generate_data',
        r'C:\Users\Administrator\Desktop\band_gap_exp_last\3.MMGS')

    all_import_structure = data_cluster.csv.all_import_structure
    data_import = all_import_structure

    select_gs = [
        'destiny', 'energy cohesive brewer', 'distance core electron(schubert)'
    ]
    select_gs = ['destiny'
                 ] + [j + "_%i" % i for j in select_gs[1:] for i in range(2)]

    data216_import = data_import.iloc[np.where(
        data_import['group_number'] == 216)[0]]
Beispiel #4
0
warnings.filterwarnings("ignore")

if __name__ == '__main__':
    def get_abbr(X_frame_name):
        element_table = pd.read_excel(r'F:\machine learning\feature_toolbox1.0\featurebox\data\element_table.xlsx',
                                      skiprows=0, index_col=0)
        name = list(element_table.loc["x_name"])
        abbr = list(element_table.loc["abbrTex"])
        name.extend(['face_dist1', 'vor_area1', 'face_dist2', 'vor_area2', "destiny", 'volume', "ele_ratio"])
        abbr.extend(['$d_{vf1}$', '$S_{vf1}$', '$d_{vf2}$', '$S_{vf2}$', r"$\rho_c$", "$V_c$", "$ele_ratio$"])
        index = [name.index(i) for i in X_frame_name]
        abbr = np.array(abbr)[index]
        return abbr


    store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp_last\3.MMGS\3.2')
    data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp_last\1.generate_data',
                r'C:\Users\Administrator\Desktop\band_gap_exp_last\3.MMGS')

    all_import_structure = data.csv.all_import_structure
    data_import = all_import_structure

    select = ['destiny', 'distance core electron(schubert)', 'energy cohesive brewer', 'volume atomic(villars,daams)',
              'radii covalent', 'electronegativity(martynov&batsanov)', 'latent heat of fusion']
    select = ['destiny'] + [j + "_%i" % i for j in select[1:] for i in range(2)]

    data216_import = data_import.iloc[np.where(data_import['group_number'] == 216)[0]]
    data225_import = data_import.iloc[np.where(data_import['group_number'] == 225)[0]]
    data216_225_import = pd.concat((data216_import, data225_import))

    X_frame = data225_import[select]
Beispiel #5
0
def eaSimple(population,
             toolbox,
             cxpb,
             mutpb,
             ngen,
             stats=None,
             halloffame=None,
             verbose=__debug__,
             pset=None,
             store=True):
    """

    Parameters
    ----------
    population
    toolbox
    cxpb
    mutpb
    ngen
    stats
    halloffame
    verbose
    pset
    store
    Returns
    -------

    """
    rst = random.getstate()
    len_pop = len(population)
    logbook = Logbook()
    logbook.header = ['gen', 'pop'] + (stats.fields if stats else [])

    # Evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in population if not ind.fitness.valid]

    # fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
    fitnesses = toolbox.parallel(iterable=population)
    for ind, fit, in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit[0],
        ind.expr = fit[1]
        ind.dim = fit[2]
        ind.withdim = fit[3]

    add_ind = toolbox.select_kbest_target_dim(population, K_best=0.1 * len_pop)
    if halloffame is not None:
        halloffame.update(add_ind)

    record = stats.compile(population) if stats else {}
    logbook.record(gen=0, nevals=len(population), **record)
    if verbose:
        print(logbook.stream)
    data_all = {}

    # Begin the generational process
    random.setstate(rst)
    for gen in range(1, ngen + 1):
        rst = random.getstate()

        if store:
            rst = random.getstate()
            target_dim = toolbox.select_kbest_target_dim.keywords['dim_type']
            subp = functools.partial(sub,
                                     subed=pset.rep_name_list,
                                     subs=pset.real_name_list)
            data = {
                "gen{}_pop{}".format(gen, n): {
                    "gen":
                    gen,
                    "pop":
                    n,
                    "score":
                    i.fitness.values[0],
                    "expr":
                    str(subp(i.expr)),
                    "with_dim":
                    1 if i.withdim else 0,
                    "dim_is_target_dim":
                    1 if i.dim in target_dim else 0,
                    "gen_dim":
                    "{}{}".format(gen, 1 if i.withdim else 0),
                    "gen_target_dim":
                    "{}{}".format(gen, 1 if i.dim in target_dim else 0),
                    "socre_dim":
                    i.fitness.values[0] if i.withdim else 0,
                    "socre_target_dim":
                    i.fitness.values[0] if i.dim in target_dim else 0,
                }
                for n, i in enumerate(population) if i is not None
            }
            data_all.update(data)
        random.setstate(rst)
        # select_gs the next generation individuals
        offspring = toolbox.select_gs(population, len_pop)

        # Vary the pool of individuals
        offspring = varAnd(offspring, toolbox, cxpb, mutpb)

        rst = random.getstate()

        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        # fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
        # fitnesses = parallelize(n_jobs=3, func=toolbox.evaluate, iterable=invalid_ind,  respective=False)
        fitnesses = toolbox.parallel(iterable=invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit[0],
            ind.expr = fit[1]
            ind.dim = fit[2]
            ind.withdim = fit[3]

        add_ind = toolbox.select_kbest_target_dim(population,
                                                  K_best=0.1 * len_pop)
        add_ind2 = toolbox.select_kbest_dimless(population,
                                                K_best=0.2 * len_pop)
        add_ind3 = toolbox.select_kbest(population, K_best=5)
        offspring += add_ind
        offspring += add_ind2
        offspring += add_ind3

        # Update the hall of fame with the generated individuals
        if halloffame is not None:
            halloffame.update(add_ind)

            if len(halloffame.items
                   ) > 0 and halloffame.items[-1].fitness.values[0] >= 0.95:
                print(halloffame.items[-1])
                print(halloffame.items[-1].fitness.values[0])
                break
        # Replace the current population by the offspring
        population[:] = offspring

        # Append the current generation statistics to the logbook
        record = stats.compile(population) if stats else {}
        logbook.record(gen=gen, nevals=len(population), **record)
        if verbose:
            print(logbook.stream)

        random.setstate(rst)

    store = Store()
    store.to_csv(data_all)
    return population, logbook
Beispiel #6
0
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
from sklearn import svm
from sklearn.model_selection import GridSearchCV, LeaveOneOut
from sklearn.preprocessing import MinMaxScaler

from featurebox.selection.backforward import BackForward
from featurebox.tools.exports import Store
from featurebox.tools.imports import Call

warnings.filterwarnings("ignore")

# 数据导入
store = Store(r'/data/home/wangchangxin/data/zlj/')
data = Call(r'/data/home/wangchangxin/data/zlj/', index_col=None)
all_import = data.xlsx().data

x_name = all_import.index.values
y = all_import["y"].values
x_frame = all_import.drop("y", axis=1)
x = x_frame.values
# # 预处理
# minmax = MinMaxScaler()
# x = minmax.fit_transform(x)
# 数据划分
xtrain, xtest = x[3:], x[:3]
ytrain, ytest = y[3:], y[:3]

xtrain, ytrain = sklearn.utils.shuffle(xtrain, ytrain, random_state=3)
Beispiel #7
0
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from featurebox.selection.corr import Corr
from featurebox.tools.exports import Store
from featurebox.tools.imports import Call
from featurebox.tools.show import corr_plot
from featurebox.tools.tool import name_to_name

# import seaborn as sns

if __name__ == "__main__":
    store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\2.corr')
    data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp')
    all_import = data.csv().all_import
    # name_init, abbr_init = data.name_and_abbr
    name_init, abbr_init = data.pickle_pd().name_and_abbr

    data_import = all_import
    data225_import = data_import.iloc[np.where(data_import['group_number'] == 225)[0]]
    X_frame = data225_import.drop(['exp_gap', 'group_number'], axis=1)
    y_frame = data225_import['exp_gap']
    X = X_frame.values
    y = y_frame.values

    """calculate corr"""
    corr = Corr(threshold=0.90, muti_grade=2, muti_index=[2, len(X)])
    corr.fit(X_frame)
Beispiel #8
0
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
# from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle

from featurebox.selection.quickmethod import method_pack
from featurebox.tools.exports import Store

# 数据导入

store = Store(r'/data/home/wangchangxin/data/wr/tem')

data = pd.read_excel(r'/data/home/wangchangxin/data/wr/tem/wrtem2.xlsx',
                     header=0,
                     skiprows=None,
                     index_col=0)

y = data["S"].values
x_p_name = ["t", 'v', 'hat']
x = data[x_p_name].values

# # # 预处理
# minmax = MinMaxScaler()
# x = minmax.fit_transform(x)
x_, y_ = shuffle(x, y, random_state=2)

# # # 建模
method_all = [
    'SVR-set', "GPR-set", "RFR-em", "AdaBR-em", "DTR-em", "LASSO-L1", "BRR-L1"
]
Beispiel #9
0
# @Time   : 2019/6/13 21:04
# @Author : Administrator
# @Project : feature_toolbox
# @FileName: 1.1add_compound_features.py
# @Software: PyCharm

import pandas as pd
import pymatgen as mg

from featurebox.featurizers.voronoifeature import count_voronoinn
from featurebox.tools.exports import Store
from featurebox.tools.imports import Call
"""
this is a description
"""
store = Store(
    r'C:\Users\Administrator\Desktop\band_gap_exp_last\1.generate_data')
data = Call(
    r'C:\Users\Administrator\Desktop\band_gap_exp_last\1.generate_data')

com_data = pd.read_excel(
    r'C:\Users\Administrator\Desktop\band_gap_exp_last\init_band_data.xlsx',
    sheet_name='binary_4_structure',
    header=0,
    skiprows=None,
    index_col=0,
    names=None)
composition = pd.Series(map(eval, com_data['composition']))
composition_mp = pd.Series(map(mg.Composition, composition))
"""for element site"""
com_mp = pd.Series([i.to_reduced_dict for i in composition_mp])
# com_mp = composition_mp
Beispiel #10
0
# -*- coding: utf-8 -*-

# @Time    : 2019/12/20 15:11
# @Email   : [email protected]
# @Software: PyCharm
# @License: BSD 3-Clause
from featurebox.tools.exports import Store
from featurebox.tools.imports import Call

store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\4.symbol', )
data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp\4.symbol')
store.to_csv(data.filename)
Beispiel #11
0
import numpy as np
import pandas as pd
from sklearn import preprocessing, utils
from sklearn.model_selection import GridSearchCV

from featurebox.selection.ugs import UGS
from featurebox.tools.exports import Store
from featurebox.tools.imports import Call
from featurebox.tools.quickmethod import dict_method_reg
from featurebox.tools.tool import name_to_name

warnings.filterwarnings("ignore")

if __name__ == '__main__':
    store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\3.sum')
    data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp')
    data_import = data.csv.all_import
    name_init, abbr_init = data.csv.name_and_abbr

    select = ['volume', 'destiny', 'lattice constants a', 'lattice constants c', 'radii covalent',
              'radii ionic(shannon)',
              'distance core electron(schubert)', 'latent heat of fusion', 'energy cohesive brewer', 'total energy',
              'charge nuclear effective(slater)', 'valence electron number', 'electronegativity(martynov&batsanov)',
              'volume atomic(villars,daams)']

    select = ['volume', 'destiny'] + [j + "_%i" % i for j in select[2:] for i in range(2)]

    data216_import = data_import.iloc[np.where(data_import['group_number'] == 216)[0]]
    data225_import = data_import.iloc[np.where(data_import['group_number'] == 225)[0]]
    data216_225_import = pd.concat((data216_import, data225_import))
Beispiel #12
0
from featurebox.symbol.calculation.translate import group_str
from featurebox.symbol.flow import MutilMutateLoop
from featurebox.symbol.preprocess import MagnitudeTransformer
from featurebox.tools.exports import Store
from featurebox.tools.imports import Call

if __name__ == "__main__":
    import os

    os.chdir(r'band_gap')

    data = Call()
    all_import = data.csv().all_import
    name_and_abbr = data.csv().name_and_abbr

    store = Store()

    data_import = all_import
    data225_import = data_import

    cal = []

    from sympy.physics.units import eV
    select = [
        'electronegativity(martynov&batsanov)', 'fusion enthalpy',
        'valence electron number'
    ]
    select_unit = [dless, eV, dless]
    cal.append((select, select_unit))

    from sympy.physics.units import eV, pm
Beispiel #13
0
import numpy as np
import pandas as pd
from pymatgen import Composition

from featurebox.featurizers.compositionfeaturizer import DepartElementFeaturizer
from featurebox.tools.exports import Store
"""
this is a description
"""
if __name__ == "__main__":

    import os

    os.chdir(r'band_gap')

    store = Store()

    com_data = pd.read_excel(r'initial_band_gap_data.xlsx')
    #
    # """for element site"""
    from featurebox.data.impot_element_table import element_table

    name_and_abbr = element_table.iloc[[0, 1], :]
    element_table = element_table.iloc[2:, :]

    #
    #
    feature_select = [
        'lattice constants a',
        'lattice constants b',
        'lattice constants c',
Beispiel #14
0
from featurebox.symbol.flow import MutilMutateLoop, OnePointMutateLoop
from featurebox.symbol.preprocess import MagnitudeTransformer
from featurebox.tools.exports import Store
from featurebox.tools.imports import Call
from featurebox.tools.tool import tt

if __name__ == "__main__":
    import os

    os.chdir(r'band_gap')

    data = Call()
    all_import = data.csv().all_import
    name_and_abbr = data.csv().name_and_abbr

    store = Store()

    data_import = all_import
    data225_import = data_import

    select = [
        'cell volume', 'cell density', 'lattice constants a',
        'lattice constants c', 'covalent radii', 'ionic radii(shannon)',
        'core electron distance(schubert)', 'fusion enthalpy',
        'cohesive energy(Brewer)', 'total energy',
        'effective nuclear charge(slater)', 'valence electron number',
        'electronegativity(martynov&batsanov)', 'atomic volume(villars,daams)'
    ]
    from sympy.physics.units import eV, pm, nm

    select_unit = [
Beispiel #15
0
def eaSimple(population,
             toolbox,
             cxpb,
             mutpb,
             ngen,
             stats=None,
             halloffame=None,
             verbose=__debug__,
             pset=None,
             store=True):
    """

    Parameters
    ----------
    population
    toolbox
    cxpb
    mutpb
    ngen
    stats
    halloffame
    verbose
    pset
    store
    Returns
    -------

    """
    rst = random.getstate()
    len_pop = len(population)
    logbook = Logbook()
    logbook.header = [] + (stats.fields if stats else [])
    data_all = {}
    random.setstate(rst)

    for gen in range(1, ngen + 1):
        "评价"
        rst = random.getstate()
        """score"""
        invalid_ind = [ind for ind in population if not ind.fitness.valid]
        fitnesses = toolbox.parallel(iterable=population)
        for ind, fit, in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit[0],
            ind.expr = fit[1]
            ind.y_dim = fit[2]
            ind.withdim = fit[3]
        random.setstate(rst)

        rst = random.getstate()
        """elite"""
        add_ind = []
        add_ind1 = toolbox.select_kbest_target_dim(population,
                                                   K_best=0.05 * len_pop)
        add_ind += add_ind1
        elite_size = len(add_ind)
        random.setstate(rst)

        rst = random.getstate()
        """score"""

        random.setstate(rst)

        rst = random.getstate()
        """record"""
        if halloffame is not None:
            halloffame.update(add_ind1)
            if len(halloffame.items
                   ) > 0 and halloffame.items[-1].fitness.values[0] >= 0.9999:
                print(halloffame.items[-1])
                print(halloffame.items[-1].fitness.values[0])
                break
        random.setstate(rst)

        rst = random.getstate()
        """Dynamic output"""

        record = stats.compile_(population) if stats else {}
        logbook.record(gen=gen, pop=len(population), **record)

        if verbose:
            print(logbook.stream)
        random.setstate(rst)
        """crossover, mutate"""
        offspring = toolbox.select_gs(population, len_pop - elite_size)
        # Vary the pool of individuals
        offspring = varAnd(offspring, toolbox, cxpb, mutpb)

        rst = random.getstate()
        """re-run"""
        offspring.extend(add_ind)
        population[:] = offspring
        random.setstate(rst)

    store = Store()
    store.to_csv(data_all)
    return population, logbook
Beispiel #16
0
#                         definate_variable=[[-5, [0]],
#                                            [-4, [1]],
#                                            [-3, [2]],
#                                            [-2, [3]],
#                                            [-1, [4]]],
#                         operate_linkage=[[-1, -2], [-3, -4]],
#                         variable_linkage=None)
#     result = mainPart(X, y, pset, pop_n=500, random_seed=2, cxpb=0.8, mutpb=0.5, ngen=20, tournsize=3, max_value=10,
#                       double=False, score=[r2_score, custom_loss_func], target_dim=target_dim)

#
##1
if __name__ == '__main__':
    import pandas as pd

    store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\4.symbol')
    data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp')
    data_import = data.csv().all_import
    name_init, abbr_init = data.pickle_pd().name_and_abbr

    select = [
        'destiny', 'valence electron number',
        'distance core electron(schubert)'
    ]

    X_frame_abbr = name_to_name(name_init,
                                abbr_init,
                                search=select,
                                search_which=1,
                                return_which=2,
                                two_layer=False)
Beispiel #17
0
    stats1 = Statistics(lambda ind: ind.fitness.values[0])
    stats1.register("max", np.max)

    stats2 = Statistics(lambda ind: 0 if ind else 0)
    stats2.register("countable_number", np.sum)
    stats = MultiStatistics(score1=stats1, score2=stats2)

    population, logbook = eaSimple(pop, toolbox, cxpb=cxpb, mutpb=mutpb, ngen=ngen, stats=stats,
                                   halloffame=hof, pset=pset, store=store)

    return hof


if __name__ == '__main__':
    # 输入
    store = Store(r'D:\sy')
    data = Call(r'D:\sy')

    data_import = data.xlsx().featuredata
    name_abbr = data_import.columns.values
    x_name = name_abbr[:-1]
    # data_import = data_import.iloc[np.where(data_import['f1'] <= 1)[0]]

    X_frame = data_import[x_name]
    y_frame = data_import['y']

    X = X_frame.values
    y = y_frame.values

    # 处理
    # scal = preprocessing.MinMaxScaler()
Beispiel #18
0
import numpy as np
import pandas as pd
from sklearn import preprocessing, utils
from sklearn.model_selection import GridSearchCV

from featurebox.selection.quickmethod import dict_method_reg
from featurebox.selection.sum import SUM
from featurebox.tools.exports import Store
from featurebox.tools.imports import Call
from featurebox.tools.tool import name_to_name

warnings.filterwarnings("ignore")

if __name__ == '__main__':
    store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\3.sum\10times100')
    data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp\3.sum\method',
                r"C:\Users\Administrator\Desktop\band_gap_exp\3.sum\10times100",
                r'C:\Users\Administrator\Desktop\band_gap_exp')
    data_import = data.csv().all_import
    name_init, abbr_init = data.pickle_pd().name_and_abbr

    select = ['volume', 'destiny', 'lattice constants a', 'lattice constants c', 'radii covalent',
              'radii ionic(shannon)',
              'distance core electron(schubert)', 'latent heat of fusion', 'energy cohesive brewer', 'total energy',
              'charge nuclear effective(slater)', 'valence electron number', 'electronegativity(martynov&batsanov)',
              'volume atomic(villars,daams)']

    select = ['volume', 'destiny'] + [j + "_%i" % i for j in select[2:] for i in range(2)]

    data216_import = data_import.iloc[np.where(data_import['group_number'] == 216)[0]]
Beispiel #19
0
def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None,
             halloffame=None, verbose=__debug__, pset=None, store=True):
    """

    Parameters
    ----------
    population
    toolbox
    cxpb
    mutpb
    ngen
    stats
    halloffame
    verbose
    pset
    store
    Returns
    -------

    """
    rst = random.getstate()
    len_pop = len(population)
    logbook = Logbook()
    logbook.header = [] + (stats.fields if stats else [])
    data_all = {}
    random.setstate(rst)

    for gen in range(1, ngen + 1):
        "评价"
        rst = random.getstate()
        """score"""
        invalid_ind = [ind for ind in population if not ind.fitness.valid]
        fitnesses = toolbox.parallel(iterable=population)
        for ind, fit, in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit[0],
            ind.expr = fit[1]
            ind.dim = fit[2]
            ind.withdim = fit[3]
        random.setstate(rst)

        rst = random.getstate()
        """elite"""
        add_ind = []
        add_ind1 = toolbox.select_kbest_target_dim(population, K_best=0.01 * len_pop)
        add_ind2 = toolbox.select_kbest_dimless(population, K_best=0.01 * len_pop)
        add_ind3 = toolbox.select_kbest(population, K_best=5)
        add_ind += add_ind1
        add_ind += add_ind2
        add_ind += add_ind3
        elite_size = len(add_ind)
        random.setstate(rst)

        rst = random.getstate()
        """score"""
        if store:
            subp = functools.partial(sub, subed=pset.rep_name_list, subs=pset.real_name_list)
            data = {"gen{}_pop{}".format(gen, n): {"gen": gen, "pop": n,
                                                   "score": i.fitness.values[0],
                                                   "expr": str(subp(i.expr)),
                                                   } for n, i in enumerate(population) if i is not None}
            data_all.update(data)
        random.setstate(rst)

        rst = random.getstate()
        """record"""
        if halloffame is not None:
            halloffame.update(add_ind3)
            if len(halloffame.items) > 0 and halloffame.items[-1].fitness.values[0] >= 0.95:
                print(halloffame.items[-1])
                print(halloffame.items[-1].fitness.values[0])
                break
        random.setstate(rst)

        rst = random.getstate()
        """Dynamic output"""

        record = stats.compile(population) if stats else {}
        logbook.record(gen=gen, pop=len(population), **record)

        if verbose:
            print(logbook.stream)
        random.setstate(rst)

        """crossover, mutate"""
        offspring = toolbox.select_gs(population, len_pop - elite_size)
        # Vary the pool of individuals
        offspring = varAnd(offspring, toolbox, cxpb, mutpb)

        rst = random.getstate()
        """re-run"""
        offspring.extend(add_ind)
        population[:] = offspring
        random.setstate(rst)

    store = Store()
    store.to_csv(data_all)
    return population, logbook
Beispiel #20
0
# @Software: PyCharm

"""

"""

import numpy as np
import pandas as pd
import sympy
from featurebox.combination.symbolbase import calculateExpr, getName

from featurebox.tools.exports import Store
from featurebox.tools.imports import Call

if __name__ == "__main__":
    store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp_last\4.symbollearning')
    data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp_last\1.generate_data',
                r'C:\Users\Administrator\Desktop\band_gap_exp_last\3.MMGS',
                r'C:\Users\Administrator\Desktop\band_gap_exp_last\2.correction_analysis')

    all_import_structure = data.csv.all_import_structure
    data_import = all_import_structure
    data216_import = data_import.iloc[np.where(data_import['group_number'] == 216)[0]]
    data225_import = data_import.iloc[np.where(data_import['group_number'] == 225)[0]]
    data221_import = data_import.iloc[np.where(data_import['group_number'] == 221)[0]]
    data216_225_221import = pd.concat((data216_import, data225_import, data221_import))

    list_name = data.csv.list_name
    list_name = list_name.values.tolist()
    list_name = [[i for i in _ if isinstance(i, str)] for _ in list_name]
    # grid = itertools.product(list_name[2],list_name[12],list_name[32])
    m = MPRester(api_key)
    ids = m.query(criteria={
        # 'pretty_formula': {"$in": name_list},
        'nelements': {"$lt": 5, "$gt": 3},
        # 'spacegroup.number': {"$in": [225]},
        'nsites': {"$lt": 20},
        'formation_energy_per_atom': {"$lt": 0},
        # "elements": {"$in": ["Al", "Co", "Cr", "Cu", "Fe", 'Ni'], "$all": "O"},
        # "elements": {"$in": list(combinations(["Al", "Co", "Cr", "Cu", "Fe", 'Ni'], 5))}
    }, properties=["material_id"])
    print("number %s" % len(ids))
    return ids


if __name__ == "__main__":
    list1 = list(
        ['CsCl', 'CsBr', 'CsI', 'CsSb', 'LiF', 'KF', 'RbF', 'CsF', 'MgO', 'CdO', 'MnO', 'VO', 'CaO', 'SrO', 'BaO',
         'EuO', 'ScN', 'YN', 'ErN', 'HoN', 'DyN', 'GdN', 'EuN', 'CeN', 'LiCl', 'TlCl', 'AgCl', 'NaCl', 'RbCl', 'LiBr',
         'TlBr', 'AgBr', 'NaBr', 'KBr', 'RbBr', 'MgSe', 'PbSe', 'CaSe', 'SrSe', 'BaSe', 'YbSe', 'EuSe', 'SmSe', 'PbS',
         'MnS', 'CaS', 'SrS', 'BaS', 'YbS', 'EuS', 'SmS', 'LiI', 'TlI', 'NaI', 'KI', 'RbI', 'YbAs', 'TmAs', 'DyAs',
         'GdAs', 'NdAs', 'SmAs', 'PrAs', 'SmP', 'AsTe', 'GeTe', 'SnTe', 'PbTe', 'CaTe', 'SrTe', 'BaTe', 'YbTe', 'ErTe',
         'GdTe', 'EuTe', 'SmTe', 'LaSb', 'YbSb', 'SmSb', 'PrSb', 'NaF', 'KCl', 'CuBr', 'BeSe', 'ZnSe', 'CdSe', 'HgSe',
         'BeS', 'ZnS', 'CdS', 'AlAs', 'AlP', 'BeTe', 'ZnTe', 'CdTe', 'HgTe', 'AlSb', 'BN', 'SiC3c', 'GaAs', 'InAs',
         'BP', 'GaP', 'InP', 'GaSb', 'InSb', 'CuCl', 'HgS', 'CuI', 'MnTe', 'AgI', 'ZnS', 'ZnSe', 'ZnO', 'AlN', 'GaN',
         'MgTe', 'BeO', 'BN', 'InN', 'SiC', 'MnS'])
    idss = get_ids(api_key="Di2IZMunaeR8vr9w", name_list=list1)
    idss1 = [i['material_id'] for i in idss]
    dff = data_fetcher("Di2IZMunaeR8vr9w", idss1, elasticity=False)
    st = Store(r"C:\Users\Administrator\Desktop")
    st.to_csv(dff, "id_structure")
Beispiel #22
0
# @Project : feature_toolbox
# @FileName: 3.0select_method.py
# @Software: PyCharm
import numpy as np
import pandas as pd
from pymatgen import Composition

from featurebox.featurizers.compositionfeaturizer import DepartElementFeaturizer
from featurebox.tools.exports import Store

"""
this is a description
"""
if __name__ == "__main__":

    store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp')

    com_data = pd.read_excel(r'C:\Users\Administrator\Desktop\band_gap_exp\init_band_data.xlsx',
                             sheet_name='binary_4_structure')

    """for element site"""
    element_table = pd.read_excel(r'F:\machine learning\feature_toolbox1.0\featurebox\data\element_table.xlsx',
                                  header=4, skiprows=0, index_col=0)
    """get x_name and abbr"""


    def get_abbr():
        abbr = list(element_table.loc["abbrTex"])
        name = list(element_table.columns)
        name.extend(['face_dist1', 'vor_area1', 'face_dist2', 'vor_area2', "destiny", 'volume', "ele_ratio"])
        abbr.extend(['$d_{vf1}$', '$S_{vf1}$', '$d_{vf2}$', '$S_{vf2}$', r"$\rho_c$", "$V_c$", "$ele_ratio$"])
Beispiel #23
0
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle

from featurebox.selection.quickmethod import method_pack
from featurebox.tools.exports import Store

# 数据导入

store = Store(r'C:\Users\Administrator\Desktop/wr')

# """for element site"""
data = pd.read_excel(r'C:\Users\Administrator\Desktop/wr/wrpvc.xlsx',
                     header=0, skiprows=None, index_col=0)

y = data["t"].values
x_p_name = ['t_t', 'v', 'b', 'hat', 'd', 't1']
x = data[x_p_name].values

x_name = ["温度刻度", "速度刻度", "风量", "加盖", "焊口距地距离", "焊接前出风口温度"]

# # # 预处理
minmax = MinMaxScaler()
x = minmax.fit_transform(x)

x, y = shuffle(x, y)
# m_corr = Corr(threshold=0.85, muti_grade=None, muti_index=None, must_index=None)
# m_corr.fit(x)
# corr = m_corr.cov

#
from sklearn import utils
from sklearn.model_selection import GridSearchCV

from featurebox.selection.exhaustion import Exhaustion
from featurebox.selection.quickmethod import dict_method_reg
from featurebox.tools.exports import Store
from featurebox.tools.imports import Call
from featurebox.tools.show import BasePlot
from featurebox.tools.tool import name_to_name

warnings.filterwarnings("ignore")
"""
this is a description
"""
if __name__ == "__main__":
    store = Store(
        r'C:\Users\Administrator\Desktop\band_gap_exp\3.sum\method', )
    data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp')
    data_import = data.csv().all_import
    name_init, abbr_init = data.pickle_pd().name_and_abbr

    select = [
        'cell volume', 'electron density', 'lattice constants a',
        'lattice constants c', 'radii covalent', 'radii ionic(shannon)',
        'distance core electron(schubert)', 'latent heat of fusion',
        'energy cohesive brewer', 'total energy',
        'charge nuclear effective(slater)', 'valence electron number',
        'electronegativity(martynov&batsanov)', 'volume atomic(villars,daams)'
    ]

    select = [
        'cell volume',
Beispiel #25
0
import pandas as pd
from pymatgen import Composition
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.feature_selection import RFECV
from sklearn.linear_model import BayesianRidge
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeRegressor

from featurebox.featurizers.compositionfeaturizer import WeightedAverage
from featurebox.selection.corr import Corr
from featurebox.tools.exports import Store
from featurebox.tools.imports import Call

# 数据导入
store = Store(r'C:\Users\Administrator\Desktop\skk')
data = Call(r'C:\Users\Administrator\Desktop\skk')
all_import = data.csv().skk

# """for element site"""
element_table = pd.read_excel(
    r'C:\Users\Administrator\Desktop\band_gap_exp\element_table.xlsx',
    header=4,
    skiprows=0,
    index_col=0)
element_table = element_table.iloc[5:, 7:]

# 其他数据获取
feature_select = [
    'lattice constants a',
    'lattice constants b',