def LASSO_implementation(alpha, train_frame, relevant_vec, stat_vec): curr_df = train_frame.loc[:, ["id", "x_Al"]] # from coreallation scatter graphs for c in stat_vec["NAME"]: if c == "x_Al": continue temp_df = pd.merge(curr_df, train_frame.loc[:, ["id", c]], how='outer') curr_var, model1 = dm_tools.lasso(curr_df.drop(["id"], axis=1), train_frame[predict_E], alpha=alpha) test_var, model2 = dm_tools.lasso(temp_df.drop(["id"], axis=1), train_frame[predict_E], alpha=alpha) curr_df, model = statistics.z_test(curr_var, test_var, defines.alpha_f, relevant_vec, c, model1, model2, temp_df, curr_df) predictions = model.predict(test_frame.loc[:, relevant_vec]) return predictions, model
def POLY_implementation(deg, train_frame, relevant_vec, stat_vec): # f test on linear regression curr_df = train_frame.loc[:, ["id", "x_Al"]] #from coreallation scatter graphs for c in stat_vec["NAME"]: if c == "x_Al": continue temp_df = pd.merge(curr_df, train_frame.loc[:, ["id", c]], how='outer') curr_var, model1 = dm_tools.poly(curr_df.drop(["id"], axis=1), train_frame[predict_E], deg=deg) test_var, model2 = dm_tools.poly(temp_df.drop(["id"], axis=1), train_frame[predict_E], deg=deg) curr_df, model = statistics.z_test(curr_var, test_var, defines.alpha_f, relevant_vec, c, model1, model2, temp_df, curr_df) poly = PolynomialFeatures(deg) X_test_transform = poly.fit_transform(test_frame.loc[:, relevant_vec]) predictions = model.predict(X_test_transform) return predictions, model
curr_df = train_frame.loc[:, ["id", "x_Al" ]] # from coreallation scatter graphs for c in stat_vec["NAME"]: if c == "x_Al": continue temp_df = pd.merge(curr_df, train_frame.loc[:, ["id", c]], how='outer') curr_var, model1 = dm_tools.poly(curr_df.drop(["id"], axis=1), train_frame[predict_E], deg=deg) test_var, model2 = dm_tools.poly(temp_df.drop(["id"], axis=1), train_frame[predict_E], deg=deg) curr_df, model = statistics.z_test(curr_var, test_var, defines.alpha_f, relevant_vec, c, model1, model2, temp_df, curr_df) poly = PolynomialFeatures(deg) X_test_transform = poly.fit_transform(test_frame.loc[:, relevant_vec]) predictions.append(model.predict(X_test_transform)) ex_dic = { 'id': range(1, 601), 'formation_energy_ev_natom': predictions[0], 'bandgap_energy_ev': predictions[1] } col = ['id', 'formation_energy_ev_natom', 'bandgap_energy_ev'] df = pd.DataFrame(ex_dic, columns=col)