Example #1
0
df['subjcat'].value_counts().plot(kind='bar')
df['sentcat'].value_counts().plot(kind='bar')
df= df[df['sentcat'].isin(['positive','negative'])]


# In[26]:


#BUILDING THE CLASSIFIERS
#ENCODING THE LABELS
le = LabelEncoder()
filtered["emotion_cat"] = le.fit_transform(labeled["emotions"])
#CONV EN LISTE ET FIT / MAX FEATURES
tfidf=TfidfVectorizer()
tfidfconverter = TfidfVectorizer(max_features=30000, min_df=7, max_df=0.8, stop_words=stopwords.words('english'))  
labeled['transformed_tweet']=tfidf.fit_transfrorm(df['filtered'])
myset=labeled[['emotions','transformed_tweet']].copy()
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# OUBLIE PAS DE TIME IT 
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVC, LinearSVC
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
classifier1 = DecisionTreeRegressor()
classifier1.fit(X_train, y_train)
y_pred = classifier1.predict(X_test)
cm = confusion_matrix(y_test, y_pred)