コード例 #1
0
# 7108 -2.751719e+06        sef
# 6673 -2.658972e+06    reapply
# 6836 -2.658972e+06   resuming
# 5323 -2.633044e+06  memorable

# Not very interesting
# sms.plot.scatter(x='tfidf_lda_spam_prob', y='pca_lda_spam_prob')
# plt.tight_layout()
# plt.show()

vader = SentimentIntensityAnalyzer()
scores = pd.DataFrame([vader.polarity_scores(text) for text in sms.text])
sms['vader'] = scores['compound']
mask = sms.vader != 0

line = LinearRegressor()
line = line.fit(pca_topic_vectors['topic4'], sms['vader'])
sms['line'] = line.predict(pca_topic_vectors['topic4'])

sgd = SGDRegressor(n_iter=200)
sgd = sgd.fit(pca_topic_vectors[['topic4']], scores['compound'])
sms['sgd'] = sgd.predict(pca_topic_vectors[['topic4']])

sms['pca_lda_spaminess'] = 2 * sms.pca_lda_spam_prob - 1


class OneNeuronRegressor:
    def __init__(self, n_inputs=1, n_iter=1000, alpha=0.1):
        self.n_inputs = n_inputs
        self.n_outputs = 1
        self.W1 = np.random.randn(self.n_outputs, self.n_inputs + 1)
コード例 #2
0

##########################

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
for col in ['pca_lda_spam_prob', 'vader', 'topic4']:
    sms.loc[:, col] = scaler.fit_transform(sms[[col]])


##########################

from nlpia.models import LinearRegressor

line = LinearRegressor()
line = line.fit(sms['topic4'], sms['vader'])
print('{:.4f}'.format(line.slope))
# 0.29

sms['line'] = line.predict(sms['topic4'])


##########################

from sklearn.linear_model import SGDRegressor

sgd = SGDRegressor(n_iter=20000)
sgd = sgd.fit(sms[['topic4']], sms['vader'])
print('{:.4f}'.format(sgd.coef_[0]))
# 0.2930