import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import KFold from sklearn import linear_model import statsmodels.api as sm from xgboost import XGBClassifier from sklearn.metrics import accuracy_score scaler = StandardScaler() names = [ 'Company_proficiency', 'Time_per_Unit', 'Innovation_Level', 'Quality', 'Stats', 'Sustainability_Index' ] dataset = pd.read_csv("dataset4.csv", names=names) print(dataset.head()) print(dataset.describe().transpose()) print(dataset.shape) X = dataset.drop('Sustainability_Index', axis=1) Y = dataset['Sustainability_Index'] X_train, X_test, Y_train, Y_test = train_test_split(X, Y) model = XGBClassifier() predictions = model.predict(X) model.fit(X_train, Y_train) accuracy = accuracy_score(Y_test, predictions) print(model.summary())
results=logit_model.fit() print(results.summary2()) # Column names for reference above df_column_name = pd.DataFrame(list(df_full_data.drop(['Attrition'], axis=1).columns.values)) df_column_name.index = np.arange(1, len(df_column_name) + 1) df_column_name import statsmodels.api as sm import statsmodels.formula.api as smf #logit_model=smf.Logit(y_train, X_train) #results=logit_model.fit() #print(results.summary2()) model= smf.logit(formula="Attrition~ Age + DailyRate + DistanceFromHome + EnvironmentSatisfaction + JobInvolvement + JobSatisfaction + NumCompaniesWorked + RelationshipSatisfaction + TotalWorkingYears + TrainingTimesLastYear + WorkLifeBalance + YearsAtCompany + MaritalStatus_Divorced + MaritalStatus_Married + MaritalStatus_Single + OverTime_No + OverTime_Yes", data= df_full_data).fit() model.summary() # GETTING THE ODDS RATIOS, Z-VALUE, AND 95% CI model_odds = pd.DataFrame(np.exp(model.params), columns= ['OR']) model_odds['z-value']= model.pvalues model_odds[['2.5%', '97.5%']] = np.exp(model.conf_int()) model_odds """# Model 6: Neural Network""" # Random seeds np.random.seed(123) rn.seed(123) tf.set_random_seed(123) # Convert Attrition to one-hot encoding for NN to be able to read