# In[2]: pip install imblearn from imblearn import under_sampling from imblearn import over_sampling from imblearn.over_sampling import SMOTE # In[40]: X_resample, y_resample = SMOTE().fit_sample(X,y.values.ravel()) X_resample = pd.DataFrame(X_resample) y_resample = pd.DataFrame(y_resample) X_resample.head() # In[ ]: from imblearn.over_sampling import SMOTE from imblearn import under_sampling, over_sampling from sklearn.model_selection import train_test_split # In[41]: # Split the data into training and test (0.3) X_train, X_test, y_train, y_test = train_test_split(X_resample,y_resample,test_size=0.3)
for ele in df.columns: if sum(np.isnan(df[ele])) > 0: np.nan_to_num(df[ele], 0) return df error_detector(df_pl.loc[:,df_pl.columns!='y']) # Try SMOTE and ADASYN with Complement Naive Bayes from imblearn.over_sampling import SMOTE, ADASYN from sklearn.naive_bayes import ComplementNB import numpy as np X, y = df_pl.loc[:, df_pl.columns != 'y'], df_pl['y'] X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2) X_resampled, y_resampled = SMOTE().fit_resample(X_train, y_train) print(X_resampled.head(), y_resampled.head()) cnb = ComplementNB() cnb.fit(X_resampled, y_resampled) y_pred_cnb = cnb.predict(X_test) y_pred_cnb = np.where(y_pred_cnb > 0.5, 1, 0) X_resampled, y_resampled = ADASYN().fit_resample(X_train, y_train) print(X_resampled.head(), y_resampled.head()) cnb = ComplementNB() cnb.fit(X_resampled, y_resampled) y_pred_adasyn = cnb.predict(X_test) y_pred_adasyn = np.where(y_pred_adasyn > 0.5, 1, 0) # Try penalized SVM from sklearn import svm