Beispiel #1
0
# In[2]:


pip install imblearn

from imblearn import under_sampling 
from imblearn import over_sampling
from imblearn.over_sampling import SMOTE
# In[40]:


X_resample, y_resample = SMOTE().fit_sample(X,y.values.ravel())
X_resample = pd.DataFrame(X_resample)
y_resample = pd.DataFrame(y_resample)
X_resample.head()


# In[ ]:


from imblearn.over_sampling import SMOTE
from imblearn import under_sampling, over_sampling
from sklearn.model_selection import train_test_split


# In[41]:


# Split the data into training and test (0.3)
X_train, X_test, y_train, y_test = train_test_split(X_resample,y_resample,test_size=0.3)
Beispiel #2
0
    for ele in df.columns:
        if sum(np.isnan(df[ele])) > 0:
            np.nan_to_num(df[ele], 0)

    return df
error_detector(df_pl.loc[:,df_pl.columns!='y'])

# Try SMOTE and ADASYN with Complement Naive Bayes
from imblearn.over_sampling import SMOTE, ADASYN
from sklearn.naive_bayes import ComplementNB
import numpy as np
X, y = df_pl.loc[:, df_pl.columns != 'y'], df_pl['y']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)
X_resampled, y_resampled = SMOTE().fit_resample(X_train, y_train)
print(X_resampled.head(), y_resampled.head())
cnb = ComplementNB()
cnb.fit(X_resampled, y_resampled)
y_pred_cnb = cnb.predict(X_test)
y_pred_cnb = np.where(y_pred_cnb > 0.5, 1, 0)


X_resampled, y_resampled = ADASYN().fit_resample(X_train, y_train)
print(X_resampled.head(), y_resampled.head())
cnb = ComplementNB()
cnb.fit(X_resampled, y_resampled)
y_pred_adasyn = cnb.predict(X_test)
y_pred_adasyn = np.where(y_pred_adasyn > 0.5, 1, 0)

# Try penalized SVM
from sklearn import svm