'Ti', 'Ta', 'Hf', 'Re', 'V', 'B', 'N', 'O', 'S', 'Zr' ] df[ele] = df[ele].fillna(0) df = df.dropna(subset=[ 'CT_RT', 'CT_CS', 'CT_EL', 'CT_RA', 'CT_Temp', 'Normal', 'Temper1', 'AGS No.', 'CT_MCR' ]) df['log_CT_CS'] = np.log(df['CT_CS']) df['log_CT_MCR'] = np.log(df['CT_MCR']) features = [i for i in df.columns if i not in ['CT_CS', 'CT_MCR', 'ID']] X = df[features].to_numpy(np.float32) #print(X.shape) y = df['CT_RT'].to_numpy(np.float32) pdata = ProcessData(X=X, y=y, features=features) #pdata.clean_data(scale_strategy={'strategy': 'power_transform', # 'method': 'yeo-johnson'}) pdata.clean_data(scale_strategy={'strategy': 'MinMaxScaler'}) data = pdata.get_data() scale = pdata.scale #X = scale.inverse_transform(data['X']) X = data['X'] features = data['features'] del data, pdata, df, y, ID, ele, model ae = AutoEncoder(arch=[33, 22, 11], X=X, loss='mse', epochs=250) ae.build_model() X_pred = ae.predict(X) print(scale.inverse_transform(X)[0], scale.inverse_transform(X_pred)[0])
df = df.dropna(subset=['CT_RT', 'CT_CS', 'CT_EL', 'CT_RA', 'CT_Temp', 'Normal', 'Temper1', 'AGS No.', 'CT_MCR']) df['log_CT_CS'] = np.log(df['CT_CS']) df['log_CT_MCR'] = np.log(df['CT_MCR']) df['LMP_Model'] = df.apply(lambda x: 1e-3 * x['CT_Temp'] * (np.log(x['CT_RT']) + 25), axis=1) features = [i for i in df.columns if i not in ['CT_RT', 'CT_Temp', 'ID', 'CT_CS', 'LMP_Model', 'CT_MCR']] df = df[df['CT_RT'] < 200000] X = df[features].to_numpy(np.float32) y = df['LMP_Model'].to_numpy(np.float32) y2 = df[['ID', 'CT_RT', 'CT_Temp', 'CT_CS']].values.tolist() pdata = ProcessData(X=X, y=y, y2=y2, features=features) pdata.clean_data() data = pdata.get_data() scale = pdata.scale del pdata CT_RT = np.array([i[1] for i in data['y2']]) CT_Temp = np.array([i[2] for i in data['y2']]) CT_CS = np.array([i[3] for i in data['y2']]) ID = [i[0] for i in data['y2']] C = np.array([25 for i in ID]) skreg = SKREG(X=data['X'], y=data['y'], estimator="LR", validation="3-Fold",
import seaborn as sns from xmat_pnnl_code import ProcessData from xmat_pnnl_code import SKGP from sklearn.gaussian_process.kernels import (RBF, WhiteKernel, DotProduct, Matern) #Load the features df = pd.read_csv('../../9_12_Cr.csv') df = df.dropna(how='all') df = df.dropna(subset=['RT', 'CS', 'CT Temp']) df['log_CS'] = np.log(df['CS']) X = df[[i for i in df.columns if i not in ['ID', 'RT', 'CS']]].to_numpy(dtype=np.float) y = df['RT'].to_numpy() pd = ProcessData( X=X, y=y, metadata=[i for i in df.columns if i not in ['ID', 'RT', 'CS']]) pd.clean_data() X, y, metadata = pd.get_data() del pd kernel = 1.0 * RBF(length_scale=1.0) + WhiteKernel(noise_level=1.0) kernel += 1.0 * DotProduct(sigma_0=1.0) + 1.0 * Matern(length_scale=1.0) skgp = SKGP(X=X, y=y, kernel=kernel, validation='5-Fold') skgp.run_GP() skgp.__dict__['features'] = metadata print(skgp.__dict__) np.save('gp_run.npy', skgp.__dict__) skgp.plot_parity(data='train', err_bar=True).savefig('train_parity_plot.png') skgp.plot_parity(data='test', err_bar=True).savefig('test_parity_plot.png')
df = pd.read_csv(path) df.replace('ND', np.nan, inplace=True) ID = [1, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 82] ID = ['9Cr-{}'.format(str(i).zfill(3)) for i in ID] df = df[df.ID.isin(ID)] df['log_CT_CS'] = np.log(df['CT_CS']) X = df[[i for i in df.columns if i not in ['ID', 'CT_RT', 'CT_Temp', 'CT_CS']]].to_numpy(dtype=np.float) df['LMP'] = df.apply(lambda x: 1e-3*(x['CT_Temp'])*(np.log(x['CT_RT']) +data[x['ID']]['C']), axis=1) y = df['LMP'].to_numpy() dprocess = ProcessData(X=X, y=y, metadata=[i for i in df.columns if i not in ['ID', 'CT_RT', 'CT_Temp', 'CT_CS']]) dprocess.clean_data() X, y, metadata = dprocess.get_data() del dprocess df = pd.DataFrame(X, columns=metadata) df['LMP'] = y #Initialize the model bframe = BayesFrame(df=df, target="LMP", val_scheme='5-Fold', bic_scheme="per_n", model_scheme=["selection"]) #Print the best model print(bframe.zoo) np.save('best_model.npy', bframe.zoo)
df = pd.read_csv(path + '/Cleaned_data.csv') df = df[df.ID.isin(ID)] df['log_CT_CS'] = np.log(df['CT_CS']) df['LMP_Model'] = df.apply(lambda x: 1e-3 * x['CT_Temp'] * (np.log(x['CT_RT']) + 25), axis=1) features = [ i for i in df.columns if i not in ['CT_RT', 'CT_Temp', 'ID', 'CT_CS', 'LMP_Model'] ] X = df[features].to_numpy(np.float32) y = df['LMP_Model'].to_numpy(np.float32) pd = ProcessData(X=X, y=y, metadata=features) pd.clean_data() X, y, metadata = pd.get_data() del pd param_space = { 'max_iter': [5000], 'activation': ['relu'], 'solver': ['lbfgs'], 'alpha': [ 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06 ], 'learning_rate': ['constant'] }
df = pd.read_csv(path + '/Cleaned_data.csv') ele = ["Fe", "C", "Cr", "Mn", "Si", "Ni", "Co", "Mo", "W", "Nb", "Al", "P", "Cu", "Ti", "V", "B", "N", "S"] df[ele] = df[ele].fillna(0) df = df.dropna(subset=['CT_RT', 'CT_CS', 'CT_EL', 'CT_RA', 'CT_Temp', 'AGS No.']) df['log_CT_CS'] = np.log(df['CT_CS']) df['log_CT_MCR'] = np.log(df['CT_MCR']) df['LMP_Model'] = df.apply(lambda x: 1e-3 * x['CT_Temp'] * (np.log(x['CT_RT']) + 25), axis=1) features = [i for i in df.columns if i not in ['CT_RT', 'CT_CS', 'ID', 'CT_MCR', 'LMP_Model']] X = df[features].to_numpy(np.float64) y = df['CT_RT'].to_numpy(np.float64) y2 = df[['ID', 'CT_RT', 'CT_Temp', 'CT_CS']].values.tolist() pdata = ProcessData(X=X, y=y, y2=y2, features=features) pdata.clean_data(scale_strategy={'strategy': 'StandardScaler'}) data = pdata.get_data() del pdata ''' parameters_grid = {'boosting_type': ['gbdt', 'goss'], 'num_leaves': [100, 200], 'max_depth': [-1], 'learning_rate': [0.01], 'n_estimators': [100, 200], 'subsample_for_bin': [200000], 'objective': [None], 'class_weight': [None], 'min_split_gain': [0.0], 'min_child_weight': [0.001],
'Fe', 'C', 'Cr', 'Mn', 'Si', 'Ni', 'Co', 'Mo', 'W', 'Nb', 'Al', 'P', 'Cu', 'Ti', 'Ta', 'Hf', 'Re', 'V', 'B', 'N', 'O', 'S', 'Zr' ] df[ele] = df[ele].fillna(0) df['LMP_Model'] = df.apply(lambda x: 1e-3 * x['CT_Temp'] * (np.log(x['CT_RT']) + C_data[x['ID']]), axis=1) features = [ i for i in df.columns if i not in ['CT_RT', 'CT_Temp', 'ID', 'ID_2', 'CT_CS', 'LMP_Model'] ] X = df[features].to_numpy(np.float32) y = df['LMP_Model'].to_numpy(np.float32) metadata = df[['ID', 'ID_2', 'CT_RT']].values.tolist() pdata = ProcessData(X=X, y=y, features=features, metadata=metadata) pdata.clean_data() data = pdata.get_data() del pdata ''' parameters_grid = {'boosting_type': ['gbdt', 'goss'], 'num_leaves': [100, 200], 'max_depth': [-1], 'learning_rate': [0.01], 'n_estimators': [100, 200], 'subsample_for_bin': [200000], 'objective': [None], 'class_weight': [None], 'min_split_gain': [0.0], 'min_child_weight': [0.001], 'min_child_samples': [20],