# 7108 -2.751719e+06 sef # 6673 -2.658972e+06 reapply # 6836 -2.658972e+06 resuming # 5323 -2.633044e+06 memorable # Not very interesting # sms.plot.scatter(x='tfidf_lda_spam_prob', y='pca_lda_spam_prob') # plt.tight_layout() # plt.show() vader = SentimentIntensityAnalyzer() scores = pd.DataFrame([vader.polarity_scores(text) for text in sms.text]) sms['vader'] = scores['compound'] mask = sms.vader != 0 line = LinearRegressor() line = line.fit(pca_topic_vectors['topic4'], sms['vader']) sms['line'] = line.predict(pca_topic_vectors['topic4']) sgd = SGDRegressor(n_iter=200) sgd = sgd.fit(pca_topic_vectors[['topic4']], scores['compound']) sms['sgd'] = sgd.predict(pca_topic_vectors[['topic4']]) sms['pca_lda_spaminess'] = 2 * sms.pca_lda_spam_prob - 1 class OneNeuronRegressor: def __init__(self, n_inputs=1, n_iter=1000, alpha=0.1): self.n_inputs = n_inputs self.n_outputs = 1 self.W1 = np.random.randn(self.n_outputs, self.n_inputs + 1)
########################## from sklearn.preprocessing import StandardScaler scaler = StandardScaler() for col in ['pca_lda_spam_prob', 'vader', 'topic4']: sms.loc[:, col] = scaler.fit_transform(sms[[col]]) ########################## from nlpia.models import LinearRegressor line = LinearRegressor() line = line.fit(sms['topic4'], sms['vader']) print('{:.4f}'.format(line.slope)) # 0.29 sms['line'] = line.predict(sms['topic4']) ########################## from sklearn.linear_model import SGDRegressor sgd = SGDRegressor(n_iter=20000) sgd = sgd.fit(sms[['topic4']], sms['vader']) print('{:.4f}'.format(sgd.coef_[0])) # 0.2930